浏览代码

Merge remote-tracking branch 'upstream/develop' into develop-flat-code-restructure

/develop-generalizationTraining-TrainerController
Deric Pang 6 年前
当前提交
cdb41480
共有 43 个文件被更改,包括 581 次插入56 次删除
  1. 64
      MLAgentsSDK/Assets/ML-Agents/Scripts/Agent.cs
  2. 9
      MLAgentsSDK/Assets/ML-Agents/Scripts/Batcher.cs
  3. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentActionProto.cs.meta
  4. 29
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentInfoProto.cs
  5. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentInfoProto.cs.meta
  6. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/BrainParametersProto.cs.meta
  7. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/BrainTypeProto.cs.meta
  8. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/CommandProto.cs.meta
  9. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/EngineConfigurationProto.cs.meta
  10. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/EnvironmentParametersProto.cs.meta
  11. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/Header.cs.meta
  12. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/ResolutionProto.cs.meta
  13. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/SpaceTypeProto.cs.meta
  14. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityInput.cs.meta
  15. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityMessage.cs.meta
  16. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityOutput.cs.meta
  17. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInitializationInput.cs.meta
  18. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInitializationOutput.cs.meta
  19. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInput.cs.meta
  20. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlOutput.cs.meta
  21. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityToExternal.cs.meta
  22. 2
      MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityToExternalGrpc.cs.meta
  23. 44
      MLAgentsSDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  24. 24
      docs/Learning-Environment-Design-Agents.md
  25. 5
      docs/localized/zh-CN/README.md
  26. 3
      python/mlagents/mlagents/envs/brain.py
  27. 11
      python/mlagents/mlagents/envs/communicator_objects/agent_info_proto_pb2.py
  28. 10
      python/mlagents/mlagents/envs/environment.py
  29. 4
      python/mlagents/mlagents/trainers/bc/models.py
  30. 6
      python/mlagents/mlagents/trainers/bc/trainer.py
  31. 21
      python/mlagents/mlagents/trainers/models.py
  32. 10
      python/mlagents/mlagents/trainers/ppo/trainer.py
  33. 6
      python/mlagents/tests/trainers/test_bc.py
  34. 15
      python/mlagents/tests/trainers/test_ppo.py
  35. 8
      MLAgentsSDK/Assets/ML-Agents/Editor/Tests.meta
  36. 154
      MLAgentsSDK/Assets/ML-Agents/Scripts/ActionMasker.cs
  37. 3
      MLAgentsSDK/Assets/ML-Agents/Scripts/ActionMasker.cs.meta
  38. 139
      MLAgentsSDK/Assets/ML-Agents/Editor/Tests/EditModeTestActionMasker.cs
  39. 11
      MLAgentsSDK/Assets/ML-Agents/Editor/Tests/EditModeTestActionMasker.cs.meta
  40. 11
      MLAgentsSDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs.meta
  41. 12
      MLAgentsSDK/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs.meta
  42. 0
      /MLAgentsSDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs

64
MLAgentsSDK/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections.Generic;
using System.Linq;
using UnityEngine;

/// Keeps track of the last text action taken by the Brain.
/// </summary>
public string storedTextActions;
/// <summary>
/// For discrete control, specifies the actions that the agent cannot take. Is true if
/// the action is masked.
/// </summary>
public bool[] actionMasks;
/// <summary>
/// Used by the Trainer to store information about the agent. This data

/// to separate between different agents in the environment.
int id;
/// Keeps track of the actions that are masked at each step.
private ActionMasker actionMasker;
/// Array of Texture2D used to render to from render buffer before
/// transforming into float tensor.
Texture2D[] textureArray;

}
BrainParameters param = brain.brainParameters;
actionMasker = new ActionMasker(param);
if (param.vectorActionSpaceType == SpaceType.continuous)
{
action.vectorActions = new float[param.vectorActionSize[0]];

info.storedVectorActions = action.vectorActions;
info.storedTextActions = action.textActions;
info.vectorObservation.Clear();
actionMasker.ResetMask();
info.actionMasks = actionMasker.GetMask();
BrainParameters param = brain.brainParameters;
if (info.vectorObservation.Count != param.vectorObservationSize)

{
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="actionIndices">The indices of the masked actions on branch 0</param>
protected void SetActionMask(IEnumerable<int> actionIndices)
{
actionMasker.SetActionMask(0, actionIndices);
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="actionIndex">The index of the masked action on branch 0</param>
protected void SetActionMask(int actionIndex)
{
actionMasker.SetActionMask(0, new int[1]{actionIndex});
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndex">The index of the masked action</param>
protected void SetActionMask(int branch, int actionIndex)
{
actionMasker.SetActionMask(branch, new int[1]{actionIndex});
}
/// <summary>
/// Modifies an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndices">The indices of the masked actions</param>
protected void SetActionMask(int branch, IEnumerable<int> actionIndices)
{
actionMasker.SetActionMask(branch, actionIndices);
}
/// <summary>
/// Adds a float observation to the vector observations of the agent.

9
MLAgentsSDK/Assets/ML-Agents/Scripts/Batcher.cs


{
StackedVectorObservation = { info.stackedVectorObservation },
StoredVectorActions = { info.storedVectorActions },
Memories = { info.memories },
StoredTextActions = info.storedTextActions,
TextObservation = info.textObservation,
Reward = info.reward,

};
if (info.memories != null)
{
agentInfoProto.Memories.Add(info.memories);
}
if (info.actionMasks != null)
{
agentInfoProto.ActionMask.AddRange(info.actionMasks);
}
foreach (Texture2D obs in info.visualObservations)
{
agentInfoProto.VisualObservations.Add(

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentActionProto.cs.meta


fileFormatVersion: 2
guid: 93eec67e32dc3484ca9b8e3ea98909c7
guid: 4482f127d4a874cf8a11da2b2cc27dc9
MonoImporter:
externalObjects: {}
serializedVersion: 2

29
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentInfoProto.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjltbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2lu",
"Zm9fcHJvdG8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzIv0BCg5BZ2Vu",
"Zm9fcHJvdG8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzIpICCg5BZ2Vu",
"Y2hlZBgJIAEoCBIKCgJpZBgKIAEoBUIfqgIcTUxBZ2VudHMuQ29tbXVuaWNh",
"dG9yT2JqZWN0c2IGcHJvdG8z"));
"Y2hlZBgJIAEoCBIKCgJpZBgKIAEoBRITCgthY3Rpb25fbWFzaxgLIAMoCEIf",
"qgIcTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "VisualObservations", "TextObservation", "StoredVectorActions", "StoredTextActions", "Memories", "Reward", "Done", "MaxStepReached", "Id" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "VisualObservations", "TextObservation", "StoredVectorActions", "StoredTextActions", "Memories", "Reward", "Done", "MaxStepReached", "Id", "ActionMask" }, null, null, null)
}));
}
#endregion

done_ = other.done_;
maxStepReached_ = other.maxStepReached_;
id_ = other.id_;
actionMask_ = other.actionMask_.Clone();
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "action_mask" field.</summary>
public const int ActionMaskFieldNumber = 11;
private static readonly pb::FieldCodec<bool> _repeated_actionMask_codec
= pb::FieldCodec.ForBool(90);
private readonly pbc::RepeatedField<bool> actionMask_ = new pbc::RepeatedField<bool>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<bool> ActionMask {
get { return actionMask_; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as AgentInfoProto);

if (Done != other.Done) return false;
if (MaxStepReached != other.MaxStepReached) return false;
if (Id != other.Id) return false;
if(!actionMask_.Equals(other.actionMask_)) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (Done != false) hash ^= Done.GetHashCode();
if (MaxStepReached != false) hash ^= MaxStepReached.GetHashCode();
if (Id != 0) hash ^= Id.GetHashCode();
hash ^= actionMask_.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

output.WriteRawTag(80);
output.WriteInt32(Id);
}
actionMask_.WriteTo(output, _repeated_actionMask_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

if (Id != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(Id);
}
size += actionMask_.CalculateSize(_repeated_actionMask_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other.Id != 0) {
Id = other.Id;
}
actionMask_.Add(other.actionMask_);
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 80: {
Id = input.ReadInt32();
break;
}
case 90:
case 88: {
actionMask_.AddEntriesFrom(input, _repeated_actionMask_codec);
break;
}
}

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/AgentInfoProto.cs.meta


fileFormatVersion: 2
guid: 9a2cd47d5b7a84d45b66748c405edf5a
guid: 791522439b8324bff85f84309db90ecc
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/BrainParametersProto.cs.meta


fileFormatVersion: 2
guid: 91e3353985a4c4c08a8004648a81de4f
guid: 7b41acc4d406e4a3c94df3399b2a6471
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/BrainTypeProto.cs.meta


fileFormatVersion: 2
guid: d2e4f3cea300049b7a4cd65fbee2ee95
guid: 8a44faf5235584f06ae45eb976a247a9
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/CommandProto.cs.meta


fileFormatVersion: 2
guid: 19e8be280f78249c188fde36f0855094
guid: 6b2ff9fe2c38b4e79aba78908cc5492c
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/EngineConfigurationProto.cs.meta


fileFormatVersion: 2
guid: fac934345fc664df8823b494ea9b1ca8
guid: 2cebeb1263d7846b4b3c7c6e5d5e193f
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/EnvironmentParametersProto.cs.meta


fileFormatVersion: 2
guid: 312dc062dfab44416a31b8b273cda29a
guid: be8c5f75bdcff41488a8e85748541100
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/Header.cs.meta


fileFormatVersion: 2
guid: e582b089dfedc438d9cbce9d4017b807
guid: 8bb8aabfab48b408381733bccccd5af9
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/ResolutionProto.cs.meta


fileFormatVersion: 2
guid: ca2454611610e4136a412b5cd6afee4d
guid: eae234f817240444a9d18b3d7366f260
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/SpaceTypeProto.cs.meta


fileFormatVersion: 2
guid: bf7e44e20999448ef846526541819077
guid: 3e61637749b07412284363ff304da763
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityInput.cs.meta


fileFormatVersion: 2
guid: c97e6e2cde58d404cba31008c0489454
guid: 25e46cd9eca204e19a08fa938802ef9d
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityMessage.cs.meta


fileFormatVersion: 2
guid: 10dca984632854b079476d5fb6df329c
guid: d270bf9ce3d564bb48b2095802c15ff9
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityOutput.cs.meta


fileFormatVersion: 2
guid: 546f38fe479d240eabdf11ac55ecf7d4
guid: 5b7166f97831f45ef86df5eed0042240
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInitializationInput.cs.meta


fileFormatVersion: 2
guid: d9c1712ba119a47458082c7190c838b0
guid: 6c81750abd5a9432babe1834534122c0
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInitializationOutput.cs.meta


fileFormatVersion: 2
guid: cfac266f05f674dbd8dc50e8e9b29753
guid: f7ac9dd525a2246688054b2442eda28a
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlInput.cs.meta


fileFormatVersion: 2
guid: 0283aaaebbbaf4c438db36396a5e3885
guid: 24680ffa432734c09b4660d82303cbd2
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityRlOutput.cs.meta


fileFormatVersion: 2
guid: a6665911e84e24b7e970f63662f55713
guid: af13b8fefefa74a948934dd273f94c4a
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityToExternal.cs.meta


fileFormatVersion: 2
guid: 553c6b5d2feba4ef69206f0e0a2a92a3
guid: 199e76fc828bc4561abad51402438e07
MonoImporter:
externalObjects: {}
serializedVersion: 2

2
MLAgentsSDK/Assets/ML-Agents/Scripts/CommunicatorObjects/UnityToExternalGrpc.cs.meta


fileFormatVersion: 2
guid: d3ea7da815b0b4d938c13e621f57db04
guid: 0378b2871a6c649f69c2f32d5c0fb045
MonoImporter:
externalObjects: {}
serializedVersion: 2

44
MLAgentsSDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


/// Modify only in inspector : Name of the previous action node
public string PreviousActionPlaceholderName = "prev_action";
/// Name of the action mask node
private string ActionMaskPlaceholderName = "action_masks";
#if ENABLE_TENSORFLOW
TFGraph graph;
TFSession session;

bool hasPrevAction;
bool hasMaskedActions;
float[,] maskedActions;
List<Texture2D> texturesHolder;
int memorySize;
#endif

{
hasValueEstimate = true;
}
if (graph[graphScope + ActionMaskPlaceholderName] != null)
{
hasMaskedActions = true;
}
observationMatrixList = new List<float[,,,]>();
texturesHolder = new List<Texture2D>();

i++;
}
}
if (hasMaskedActions)
{
maskedActions = new float[
currentBatchSize,
brain.brainParameters.vectorActionSize.Sum()
];
var i = 0;
foreach (Agent agent in agentList)
{
for (int j = 0; j < brain.brainParameters.vectorActionSize.Sum(); j++)
{
if (agentInfo[agent].actionMasks != null)
{
maskedActions[i, j] = agentInfo[agent].actionMasks[j] ? 0.0f : 1.0f;
}
else
{
maskedActions[i, j] = 1.0f;
}
}
i++;
}
}
observationMatrixList.Clear();
for (int observationIndex =
0;

runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
}
// Create the mask action tensor
if (hasMaskedActions)
{
runner.AddInput(graph[graphScope + ActionMaskPlaceholderName][0], maskedActions);
}
// Create the observation tensors
for (int obsNumber =
0;

24
docs/Learning-Environment-Design-Agents.md


Note that the above code example is a simplified extract from the AreaAgent class, which provides alternate implementations for both the discrete and the continuous action spaces.
#### Masking Discrete Actions
When using Discrete Actions, it is possible to specify that some actions are impossible for the next decision. Then the agent is controlled by an External or Internal Brain, the agent will be unable to perform the specified action. Note that when the agent is controlled by a Player or Heuristic Brain, the agent will still be able to decide to perform the masked action. In order to mask an action, call the method `SetActionMask` within the `CollectObservation` method :
```csharp
SetActionMask(branch, actionIndices)
```
Where :
* `branch` is the index (starting at 0) of the branch on which you want to mask the action
* `actionIndices` is a list of `int` or a single `int` corresponding to the index of theaction that the agent cannot perform.
For example, if you have an agent with 2 branches and on the first branch (branch 0) there are 4 possible actions : _"do nothing"_, _"jump"_, _"shoot"_ and _"change weapon"_. Then with the code bellow, the agent will either _"do nothing"_ or _"change weapon"_ for his next decision (since action index 1 and 2 are masked)
```csharp
SetActionMask(0, new int[2]{1,2})
```
Notes:
* You can call `SetActionMask` multiple times if you want to put masks on multiple branches.
* You cannot mask all the actions of a branch.
* You cannot mask actions in continuous control.
## Rewards
In reinforcement learning, the reward is a signal that the agent has done something right. The PPO reinforcement learning algorithm works by optimizing the choices an agent makes such that the agent earns the highest cumulative reward over time. The better your reward mechanism, the better your agent will learn.

5
docs/localized/zh-CN/README.md


[贡献准则](/CONTRIBUTING.md)和
[行为准则](/CODE_OF_CONDUCT.md)。
您可以通过 Unity Connect 和 GitHub 与我们
以及更广泛的社区进行交流:
您可以通过 Unity Connect 和 GitHub 与我们以及更广泛的社区进行交流:
* 加入我们的
[Unity 机器学习频道](https://connect.unity.com/messages/c/035fba4f88400000)
与使用 ML-Agents 的其他人以及对机器学习充满热情的 Unity 开发者

确保提供尽可能多的详细信息。
对于任何其他问题或反馈,请直接与 ML-Agents 团队联系,
地址为 ml-agents@unity3d.com。
电子邮件地址为 ml-agents@unity3d.com。
## 许可证

3
python/mlagents/mlagents/envs/brain.py


class BrainInfo:
def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
vector_action=None, text_action=None, max_reached=None, action_mask=None):
"""
Describes experience at current step of all agents linked to a brain.
"""

self.agents = agents
self.previous_vector_actions = vector_action
self.previous_text_actions = text_action
self.action_masks = action_mask
AllBrainInfo = Dict[str, BrainInfo]

11
python/mlagents/mlagents/envs/communicator_objects/agent_info_proto_pb2.py


package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n9mlagents/envs/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\"\xfd\x01\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n9mlagents/envs/communicator_objects/agent_info_proto.proto\x12\x14\x63ommunicator_objects\"\x92\x02\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x1b\n\x13visual_observations\x18\x02 \x03(\x0c\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
)

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=10,
number=11, type=8, cpp_type=7, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],

oneofs=[
],
serialized_start=84,
serialized_end=337,
serialized_end=358,
)
DESCRIPTOR.message_types_by_name['AgentInfoProto'] = _AGENTINFOPROTO

10
python/mlagents/mlagents/envs/environment.py


else:
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
memory = np.array([x.memories for x in agent_info_list])
total_num_actions = sum(self.brains[b].vector_action_space_size)
mask_actions = np.ones((len(agent_info_list), total_num_actions))
for agent_index, agent_info in enumerate(agent_info_list):
if agent_info.action_mask is not None:
if len(agent_info.action_mask) == total_num_actions:
mask_actions[agent_index, :] = [
0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
if any([np.isnan(x.reward) for x in agent_info_list]):
logger.warning("An agent had a NaN reward for brain "+b)
if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):

local_done=[x.done for x in agent_info_list],
vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
text_action=[x.stored_text_actions for x in agent_info_list],
max_reached=[x.max_step_reached for x in agent_info_list]
max_reached=[x.max_step_reached for x in agent_info_list],
action_mask=mask_actions
)
return _data, global_done

4
python/mlagents/mlagents/trainers/bc/models.py


kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
self.action_probs = tf.concat(
[tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
self.sample_action_float = tf.concat([tf.multinomial(branch, 1) for branch in policy_branches], axis=1)
self.action_masks = tf.placeholder(shape=[None, sum(self.a_size)], dtype=tf.float32, name="action_masks")
self.sample_action_float = self.create_discrete_action_masking_layer(
policy_branches, self.action_masks, self.a_size)
self.sample_action_float = tf.identity(self.sample_action_float, name="action")
self.sample_action = tf.cast(self.sample_action_float, tf.int32)
self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")

6
python/mlagents/mlagents/trainers/bc/trainer.py


feed_dict[self.model.visual_in[i]] = agent_brain.visual_observations[i]
if self.use_vector_observations:
feed_dict[self.model.vector_in] = agent_brain.vector_observations
if not self.is_continuous_action:
feed_dict[self.model.action_masks] = agent_brain.action_masks
if self.use_recurrent:
if agent_brain.memories.shape[1] == 0:
agent_brain.memories = np.zeros((len(agent_brain.agents), self.m_size))

:param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param take_action_outputs: The outputs of the take action method.
"""
# Used to collect teacher experience into training buffer
info_teacher = curr_info[self.brain_to_imitate]
next_info_teacher = next_info[self.brain_to_imitate]

for i, _ in enumerate(self.model.visual_in):
_obs = np.array(_buffer['visual_observations%d' % i][start:end])
feed_dict[self.model.visual_in[i]] = _obs
if not self.is_continuous_action:
feed_dict[self.model.action_masks] = np.ones(
(self.n_sequences, sum(self.brain.vector_action_space_size)))
if self.use_recurrent:
feed_dict[self.model.memory_in] = np.zeros([self.n_sequences, self.m_size])
loss, _ = self.sess.run([self.model.loss, self.model.update], feed_dict=feed_dict)

21
python/mlagents/mlagents/trainers/models.py


num_layers, scope, reuse)
return hidden_flat
@staticmethod
def create_discrete_action_masking_layer(branches_logits, action_masks, action_size):
"""
Creates a masking layer for the discrete actions
:param branches_logits: A list of the unnormalized action probabilities fir each branch
:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches]
"""
action_idx = [0] + list(np.cumsum(action_size))
branch_masks = [action_masks[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]
raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]), branch_masks[k])
for k in range(len(action_size))]
normalized_probs = [tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
for k in range(len(action_size))]
output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1)
return output
def create_observation_streams(self, num_streams, h_size, num_layers):
"""
Creates encoding stream for observations.

self.all_log_probs = tf.concat([branch for branch in policy_branches], axis=1, name="action_probs")
output = tf.concat([tf.multinomial(branch, 1) for branch in policy_branches], axis=1)
self.action_masks = tf.placeholder(shape=[None, sum(self.a_size)], dtype=tf.float32, name="action_masks")
output = self.create_discrete_action_masking_layer(policy_branches, self.action_masks, self.a_size)
self.output = tf.identity(output, name="action")

10
python/mlagents/mlagents/trainers/ppo/trainer.py


class PPOTrainer(Trainer):
"""The PPOTrainer is an implementation of the PPO algorithm."""
action_masking_name = 'action_masks'
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.

feed_dict[self.model.visual_in[i]] = curr_brain_info.visual_observations[i]
if self.use_vector_obs:
feed_dict[self.model.vector_in] = curr_brain_info.vector_observations
if not self.is_continuous_action:
feed_dict[self.model.action_masks] = curr_brain_info.action_masks
values = self.sess.run(self.inference_run_list, feed_dict=feed_dict)
run_out = dict(zip(self.inference_run_list, values))

if self.is_continuous_action:
actions_pre = stored_take_action_outputs[self.model.output_pre]
self.training_buffer[agent_id]['actions_pre'].append(actions_pre[idx])
else:
self.training_buffer[agent_id][self.action_masking_name].append(stored_info.action_masks[idx])
a_dist = stored_take_action_outputs[self.model.all_log_probs]
value = stored_take_action_outputs[self.model.value]
self.training_buffer[agent_id]['actions'].append(actions[idx])

if self.use_recurrent:
feed_dict[self.model.prev_action] = np.array(buffer['prev_action'][start:end]).reshape(
[-1, len(self.brain.vector_action_space_size)])
feed_dict[self.model.action_masks] = np.array(buffer[self.action_masking_name][start:end]).reshape(
[-1, sum(self.brain.vector_action_space_size)]
)
if self.use_vector_obs:
total_observation_length = self.brain.vector_observation_space_size * \
self.brain.num_stacked_vector_observations

6
python/mlagents/tests/trainers/test_bc.py


model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2,2])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])}
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2,2])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

15
python/mlagents/tests/trainers/test_ppo.py


model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3])
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2,2])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()

feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([2,2])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

model.prev_action: [[0], [0]],
model.memory_in: np.zeros((1, memory_size)),
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
[3, 4, 5, 3, 4, 5]]),
model.action_masks: np.ones([1,2])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

[3, 4, 5, 3, 4, 5]]),
model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.action_holder: [[0], [0]]}
model.action_holder: [[0], [0]],
model.action_masks: np.ones([2,2])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

model.visual_in[0]: np.ones([2, 40, 30, 3]),
model.visual_in[1]: np.ones([2, 40, 30, 3]),
model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
model.next_visual_in[1]: np.ones([2, 40, 30, 3])
model.next_visual_in[1]: np.ones([2, 40, 30, 3]),
model.action_masks: np.ones([2,2])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()

8
MLAgentsSDK/Assets/ML-Agents/Editor/Tests.meta


fileFormatVersion: 2
guid: 172fcc71d343247a9a91d5b54dd21cd6
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

154
MLAgentsSDK/Assets/ML-Agents/Scripts/ActionMasker.cs


using System;
using System.Collections.Generic;
using System.Linq;
namespace MLAgents
{
public class ActionMasker
{
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.
private int[] _startingActionIndices;
private bool[] _currentMask;
private readonly BrainParameters _brainParameters;
public ActionMasker(BrainParameters brainParameters)
{
this._brainParameters = brainParameters;
}
/// <summary>
/// Modifies an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndices">The indices of the masked actions</param>
public void SetActionMask(int branch, IEnumerable<int> actionIndices)
{
// If the branch does not exist, raise an error
if (branch >= _brainParameters.vectorActionSize.Length )
throw new UnityAgentsException(
"Invalid Action Masking : Branch "+branch+" does not exist.");
int totalNumberActions = _brainParameters.vectorActionSize.Sum();
// By default, the masks are null. If we want to specify a new mask, we initialize
// the actionMasks with trues.
if (_currentMask == null)
{
_currentMask = new bool[totalNumberActions];
}
// If this is the first time the masked actions are used, we generate the starting
// indices for each branch.
if (_startingActionIndices == null)
{
_startingActionIndices = CreateActionStartinIndices();
}
// Perform the masking
foreach (var actionIndex in actionIndices)
{
if (actionIndex >= _brainParameters.vectorActionSize[branch])
{
throw new UnityAgentsException(
"Invalid Action Masking: Action Mask is too large for specified branch.");
}
_currentMask[actionIndex + _startingActionIndices[branch]] = true;
}
}
/// <summary>
/// Get the current mask for an agent
/// </summary>
/// <returns>A mask for the agent. A boolean array of length equal to the total number of
/// actions.</returns>
public bool[] GetMask()
{
AssertMask();
return _currentMask;
}
/// <summary>
/// Makes sure that the current mask is usable.
/// </summary>
private void AssertMask()
{
// Action Masks can only be used in Discrete Control.
if (_brainParameters.vectorActionSpaceType != SpaceType.discrete)
{
throw new UnityAgentsException(
"Invalid Action Masking : Can only set action mask for Discrete Control.");
}
var numBranches = _brainParameters.vectorActionSize.Length;
for (var branchIndex = 0 ; branchIndex < numBranches; branchIndex++ )
{
if (AreAllActionsMasked(branchIndex))
{
throw new UnityAgentsException(
"Invalid Action Masking : All the actions of branch " + branchIndex +
" are masked.");
}
}
}
/// <summary>
/// Resets the current mask for an agent
/// </summary>
public void ResetMask()
{
if (_currentMask != null)
{
Array.Clear(_currentMask, 0, _currentMask.Length);
}
}
/// <summary>
/// Generates an array containing the starting indicies of each branch in the vector action
/// Makes a cumulative sum.
/// </summary>
/// <returns></returns>
private int[] CreateActionStartinIndices()
{
var vectorActionSize = _brainParameters.vectorActionSize;
var runningSum = 0;
var result = new int[vectorActionSize.Length + 1];
for (var actionIndex = 0;
actionIndex < vectorActionSize.Length; actionIndex++)
{
runningSum += vectorActionSize[actionIndex];
result[actionIndex + 1] = runningSum;
}
return result;
}
/// <summary>
/// Checks if all the actions in the input branch are masked
/// </summary>
/// <param name="branch"> The index of the branch to check</param>
/// <returns> True if all the actions of the branch are masked</returns>
private bool AreAllActionsMasked(int branch)
{
if (_currentMask == null)
{
return false;
}
var start = _startingActionIndices[branch];
var end = _startingActionIndices[branch + 1];
for (var i = start; i < end; i++)
{
if (!_currentMask[i])
{
return false;
}
}
return true;
}
}
}

3
MLAgentsSDK/Assets/ML-Agents/Scripts/ActionMasker.cs.meta


fileFormatVersion: 2
guid: 8a0ec4ccf4ee450da7766f65228d5460
timeCreated: 1534530911

139
MLAgentsSDK/Assets/ML-Agents/Editor/Tests/EditModeTestActionMasker.cs


using NUnit.Framework;
namespace MLAgents.Tests
{
public class EditModeTestActionMasker
{
[Test]
public void Contruction()
{
var bp = new BrainParameters();
var masker = new ActionMasker(bp);
Assert.IsNotNull(masker);
}
[Test]
public void FailsWithContinuous()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.continuous;
bp.vectorActionSize = new int[1] {4};
var masker = new ActionMasker(bp);
masker.SetActionMask(0, new int[1] {0});
Assert.Catch<UnityAgentsException>(() => masker.GetMask());
}
[Test]
public void NullMask()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
var masker = new ActionMasker(bp);
var mask = masker.GetMask();
Assert.IsNull(mask);
}
[Test]
public void FirstBranchMask()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
bp.vectorActionSize = new int[3] {4, 5, 6};
var masker = new ActionMasker(bp);
var mask = masker.GetMask();
Assert.IsNull(mask);
masker.SetActionMask(0, new int[]{1,2,3});
mask = masker.GetMask();
Assert.IsFalse(mask[0]);
Assert.IsTrue(mask[1]);
Assert.IsTrue(mask[2]);
Assert.IsTrue(mask[3]);
Assert.IsFalse(mask[4]);
Assert.AreEqual(mask.Length, 15);
}
[Test]
public void SecondBranchMask()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
bp.vectorActionSize = new int[3] {4, 5, 6};
var masker = new ActionMasker(bp);
bool[] mask = masker.GetMask();
masker.SetActionMask(1, new int[]{1,2,3});
mask = masker.GetMask();
Assert.IsFalse(mask[0]);
Assert.IsFalse(mask[4]);
Assert.IsTrue(mask[5]);
Assert.IsTrue(mask[6]);
Assert.IsTrue(mask[7]);
Assert.IsFalse(mask[8]);
Assert.IsFalse(mask[9]);
}
[Test]
public void MaskReset()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
bp.vectorActionSize = new int[3] {4, 5, 6};
var masker = new ActionMasker(bp);
var mask = masker.GetMask();
masker.SetActionMask(1, new int[3]{1,2,3});
mask = masker.GetMask();
masker.ResetMask();
mask = masker.GetMask();
for (var i = 0; i < 15; i++)
{
Assert.IsFalse(mask[i]);
}
}
[Test]
public void ThrowsError()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
bp.vectorActionSize = new int[3] {4, 5, 6};
var masker = new ActionMasker(bp);
Assert.Catch<UnityAgentsException>(
() => masker.SetActionMask(0, new int[1]{5}));
Assert.Catch<UnityAgentsException>(
() => masker.SetActionMask(1, new int[1]{5}));
masker.SetActionMask(2, new int[1] {5});
Assert.Catch<UnityAgentsException>(
() => masker.SetActionMask(3, new int[1]{1}));
masker.GetMask();
masker.ResetMask();
masker.SetActionMask(0, new int[4] {0, 1, 2, 3});
Assert.Catch<UnityAgentsException>(
() => masker.GetMask());
}
[Test]
public void MultipleMaskEdit()
{
var bp = new BrainParameters();
bp.vectorActionSpaceType = SpaceType.discrete;
bp.vectorActionSize = new int[3] {4, 5, 6};
var masker = new ActionMasker(bp);
masker.SetActionMask(0, new int[2] {0, 1});
masker.SetActionMask(0, new int[1] {3});
masker.SetActionMask(2, new int[1] {1});
var mask = masker.GetMask();
for (var i = 0; i < 15; i++)
{
if ((i == 0) || (i == 1) || (i == 3)|| (i == 10))
{
Assert.IsTrue(mask[i]);
}
else
{
Assert.IsFalse(mask[i]);
}
}
}
}
}

11
MLAgentsSDK/Assets/ML-Agents/Editor/Tests/EditModeTestActionMasker.cs.meta


fileFormatVersion: 2
guid: 2e2810ee6c8c64fb39abdf04b5d17f50
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

11
MLAgentsSDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs.meta


fileFormatVersion: 2
guid: 3170fcbfa5f4d4a8ca82c50c750e9083
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

12
MLAgentsSDK/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs.meta


fileFormatVersion: 2
guid: 64f5b117b5f304a4281f16eb904311fd
timeCreated: 1518706577
licenseType: Free
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

/MLAgentsSDK/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs → /MLAgentsSDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs

正在加载...
取消
保存