浏览代码
New Learning Brain (#1303)
New Learning Brain (#1303)
* Initial Commit * attempt at refactor * Put all static methods into the CoreInternalBrain * improvements * more testing * modifications * renamed epsilon * misc * Now supports discrete actions * added discrete support and RNN and visual. Left to do is refactor and save variables into models * code cleaning * made a tensor generator and applier * fix on the models.py file * Moved the Checks to a different Class * Added some unit tests * BugFix * Need to generate the output tensors as well as inputs before executing the graph * Made NodeNames static and created a new namespace * Added comments to the TensorAppliers * Started adding comments on the TensorGenerators code * Added comments for the Tensor Generator * Moving the helper classes into a separate folder * Added initial comments to the TensorChecks * Renamed NodeNames -> TensorNames * Removing warnings in tests * Now using Aut.../develop-generalizationTraining-TrainerController
GitHub
6 年前
当前提交
6c354d16
共有 31 个文件被更改,包括 1626 次插入 和 458 次删除
-
56UnitySDK/Assets/ML-Agents/Editor/LearningBrainEditor.cs
-
1UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
-
3UnitySDK/Assets/ML-Agents/Editor/Tests/UtilitiesTests.cs
-
20UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
-
1UnitySDK/Assets/ML-Agents/Scripts/HeuristicBrain.cs
-
498UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs.meta
-
2UnitySDK/Assets/ML-Agents/Scripts/PlayerBrain.cs
-
6ml-agents/mlagents/trainers/bc/models.py
-
27ml-agents/mlagents/trainers/models.py
-
8ml-agents/mlagents/trainers/policy.py
-
10ml-agents/mlagents/trainers/ppo/policy.py
-
3ml-agents/mlagents/trainers/ppo/trainer.py
-
138UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
-
3UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs.meta
-
158UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
-
11UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs.meta
-
8UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain.meta
-
143UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs.meta
-
224UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs.meta
-
531UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelParamLoader.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelParamLoader.cs.meta
-
80UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
-
11UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs.meta
-
99UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs.meta
-
25UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorNames.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorNames.cs.meta
|
|||
using System.Collections.Generic; |
|||
using System.Linq; |
|||
using NUnit.Framework; |
|||
using UnityEngine; |
|||
using UnityEngine.MachineLearning.InferenceEngine; |
|||
using UnityEngine.MachineLearning.InferenceEngine.Util; |
|||
using System.Reflection; |
|||
using MLAgents.InferenceBrain; |
|||
|
|||
namespace MLAgents.Tests |
|||
{ |
|||
public class EditModeTestInternalBrainTensorApplier |
|||
{ |
|||
private class TestAgent : Agent |
|||
{ |
|||
public AgentAction GetAction() |
|||
{ |
|||
FieldInfo f = typeof(Agent).GetField( |
|||
"action", BindingFlags.Instance | BindingFlags.NonPublic); |
|||
return (AgentAction) f.GetValue(this); |
|||
} |
|||
} |
|||
|
|||
private Dictionary<Agent, AgentInfo> GetFakeAgentInfos() |
|||
{ |
|||
var goA = new GameObject("goA"); |
|||
var agentA = goA.AddComponent<TestAgent>(); |
|||
var infoA = new AgentInfo(); |
|||
var goB = new GameObject("goB"); |
|||
var agentB = goB.AddComponent<TestAgent>(); |
|||
var infoB = new AgentInfo(); |
|||
|
|||
return new Dictionary<Agent, AgentInfo>(){{agentA, infoA},{agentB, infoB}}; |
|||
} |
|||
|
|||
[Test] |
|||
public void Contruction() |
|||
{ |
|||
var bp = new BrainParameters(); |
|||
var tensorGenerator = new TensorApplier(bp, 0); |
|||
Assert.IsNotNull(tensorGenerator); |
|||
} |
|||
|
|||
[Test] |
|||
public void ApplyContinuousActionOutput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 3}, |
|||
Data = new float[,] {{1, 2, 3}, {4, 5, 6}} |
|||
}; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var applier = new ContinuousActionOutputApplier(); |
|||
applier.Apply(inputTensor, agentInfos); |
|||
var agents = agentInfos.Keys.ToList(); |
|||
var agent = agents[0] as TestAgent; |
|||
var action = agent.GetAction(); |
|||
Assert.AreEqual(action.vectorActions[0], 1); |
|||
Assert.AreEqual(action.vectorActions[1], 2); |
|||
Assert.AreEqual(action.vectorActions[2], 3); |
|||
agent = agents[1] as TestAgent; |
|||
action = agent.GetAction(); |
|||
Assert.AreEqual(action.vectorActions[0], 4); |
|||
Assert.AreEqual(action.vectorActions[1], 5); |
|||
Assert.AreEqual(action.vectorActions[2], 6); |
|||
} |
|||
|
|||
[Test] |
|||
public void ApplyDiscreteActionOutput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 5}, |
|||
Data = new float[,] {{0.5f, 22.5f, 0.1f, 5f, 1f}, |
|||
{4f, 5f, 6f, 7f, 8f}} |
|||
}; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0); |
|||
applier.Apply(inputTensor, agentInfos); |
|||
var agents = agentInfos.Keys.ToList(); |
|||
var agent = agents[0] as TestAgent; |
|||
var action = agent.GetAction(); |
|||
Assert.AreEqual(action.vectorActions[0], 1); |
|||
Assert.AreEqual(action.vectorActions[1], 1); |
|||
agent = agents[1] as TestAgent; |
|||
action = agent.GetAction(); |
|||
Assert.AreEqual(action.vectorActions[0], 1); |
|||
Assert.AreEqual(action.vectorActions[1], 2); |
|||
} |
|||
|
|||
[Test] |
|||
public void ApplyMemoryOutput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 5}, |
|||
Data = new float[,] {{0.5f, 22.5f, 0.1f, 5f, 1f}, |
|||
{4f, 5f, 6f, 7f, 8f}} |
|||
}; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var applier = new MemoryOutputApplier(); |
|||
applier.Apply(inputTensor, agentInfos); |
|||
var agents = agentInfos.Keys.ToList(); |
|||
var agent = agents[0] as TestAgent; |
|||
var action = agent.GetAction(); |
|||
Assert.AreEqual(action.memories[0], 0.5f); |
|||
Assert.AreEqual(action.memories[1], 22.5f); |
|||
agent = agents[1] as TestAgent; |
|||
action = agent.GetAction(); |
|||
Assert.AreEqual(action.memories[2], 6); |
|||
Assert.AreEqual(action.memories[3], 7); |
|||
} |
|||
|
|||
[Test] |
|||
public void ApplyValueEstimate() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 1}, |
|||
Data = new float[,] {{0.5f}, {8f}} |
|||
}; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var applier = new ValueEstimateApplier(); |
|||
applier.Apply(inputTensor, agentInfos); |
|||
var agents = agentInfos.Keys.ToList(); |
|||
var agent = agents[0] as TestAgent; |
|||
var action = agent.GetAction(); |
|||
Assert.AreEqual(action.value, 0.5f); |
|||
agent = agents[1] as TestAgent; |
|||
action = agent.GetAction(); |
|||
Assert.AreEqual(action.value, 8); |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: be419f7ed5c24b24a6f2636d3b107535 |
|||
timeCreated: 1537915674 |
|
|||
using System; |
|||
using System.Collections.Generic; |
|||
using System.Linq; |
|||
using NUnit.Framework; |
|||
using UnityEngine; |
|||
using UnityEngine.MachineLearning.InferenceEngine; |
|||
using UnityEngine.MachineLearning.InferenceEngine.Util; |
|||
using MLAgents.InferenceBrain; |
|||
|
|||
namespace MLAgents.Tests |
|||
{ |
|||
public class EditModeTestInternalBrainTensorGenerator |
|||
{ |
|||
private class TestAgent : Agent |
|||
{ |
|||
|
|||
} |
|||
|
|||
private Dictionary<Agent, AgentInfo> GetFakeAgentInfos() |
|||
{ |
|||
var goA = new GameObject("goA"); |
|||
var agentA = goA.AddComponent<TestAgent>(); |
|||
var infoA = new AgentInfo() |
|||
{ |
|||
stackedVectorObservation = (new float[] {1f, 2f, 3f}).ToList(), |
|||
memories = null, |
|||
storedVectorActions = new float[] {1, 2}, |
|||
actionMasks = null, |
|||
|
|||
}; |
|||
var goB = new GameObject("goB"); |
|||
var agentB = goB.AddComponent<TestAgent>(); |
|||
var infoB = new AgentInfo() |
|||
{ |
|||
stackedVectorObservation = (new float[] {4f, 5f, 6f}).ToList(), |
|||
memories = (new float[] {1f, 1f, 1f}).ToList(), |
|||
storedVectorActions = new float[] {3, 4}, |
|||
actionMasks = new bool[] {true, false, false, false, false}, |
|||
}; |
|||
|
|||
return new Dictionary<Agent, AgentInfo>(){{agentA, infoA},{agentB, infoB}}; |
|||
} |
|||
|
|||
[Test] |
|||
public void Contruction() |
|||
{ |
|||
var bp = new BrainParameters(); |
|||
var tensorGenerator = new TensorGenerator(bp, 0); |
|||
Assert.IsNotNull(tensorGenerator); |
|||
} |
|||
|
|||
[Test] |
|||
public void GenerateBatchSize() |
|||
{ |
|||
var inputTensor = new Tensor(); |
|||
var batchSize = 4; |
|||
var generator = new BatchSizeGenerator(); |
|||
generator.Generate(inputTensor, batchSize, null); |
|||
Assert.IsNotNull(inputTensor.Data as int[]); |
|||
Assert.AreEqual((inputTensor.Data as int[])[0], batchSize); |
|||
} |
|||
|
|||
[Test] |
|||
public void GenerateSequenceLength() |
|||
{ |
|||
var inputTensor = new Tensor(); |
|||
var batchSize = 4; |
|||
var generator = new SequenceLengthGenerator(); |
|||
generator.Generate(inputTensor, batchSize, null); |
|||
Assert.IsNotNull(inputTensor.Data as int[]); |
|||
Assert.AreEqual((inputTensor.Data as int[])[0], 1); |
|||
} |
|||
|
|||
[Test] |
|||
public void GenerateVectorObservation() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 3} |
|||
}; |
|||
var batchSize = 4; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var generator = new VectorObservationGenerator(); |
|||
generator.Generate(inputTensor, batchSize, agentInfos); |
|||
Assert.IsNotNull(inputTensor.Data as float[,]); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 0], 1); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 2], 3); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 0], 4); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 2], 6); |
|||
} |
|||
|
|||
[Test] |
|||
public void GenerateRecurrentInput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 5} |
|||
}; |
|||
var batchSize = 4; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var generator = new RecurrentInputGenerator(); |
|||
generator.Generate(inputTensor, batchSize, agentInfos); |
|||
Assert.IsNotNull(inputTensor.Data as float[,]); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 0], 0); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 4], 0); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 0], 1); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 4], 0); |
|||
} |
|||
|
|||
[Test] |
|||
public void GeneratePreviousActionInput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 2}, |
|||
ValueType = Tensor.TensorType.FloatingPoint |
|||
|
|||
}; |
|||
var batchSize = 4; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var generator = new PreviousActionInputGenerator(); |
|||
Assert.Catch<NotImplementedException>( |
|||
() => generator.Generate(inputTensor, batchSize, agentInfos)); |
|||
|
|||
inputTensor.ValueType = Tensor.TensorType.Integer; |
|||
generator.Generate(inputTensor, batchSize, agentInfos); |
|||
Assert.IsNotNull(inputTensor.Data as int[,]); |
|||
Assert.AreEqual((inputTensor.Data as int[,])[0, 0], 1); |
|||
Assert.AreEqual((inputTensor.Data as int[,])[0, 1], 2); |
|||
Assert.AreEqual((inputTensor.Data as int[,])[1, 0], 3); |
|||
Assert.AreEqual((inputTensor.Data as int[,])[1, 1], 4); |
|||
} |
|||
|
|||
[Test] |
|||
public void GenerateActionMaskInput() |
|||
{ |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
Shape = new long[] {2, 5}, |
|||
ValueType = Tensor.TensorType.FloatingPoint |
|||
|
|||
}; |
|||
var batchSize = 4; |
|||
var agentInfos = GetFakeAgentInfos(); |
|||
|
|||
var generator = new ActionMaskInputGenerator(); |
|||
generator.Generate(inputTensor, batchSize, agentInfos); |
|||
Assert.IsNotNull(inputTensor.Data as float[,]); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 0], 1); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[0, 4], 1); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 0], 0); |
|||
Assert.AreEqual((inputTensor.Data as float[,])[1, 4], 1); |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: d2d2076c51c414ac7a91f8fbf15d4f7c |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 79c170c0af66140e68d7eca827f0d788 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using UnityEngine.MachineLearning.InferenceEngine; |
|||
using System.Collections.Generic; |
|||
using UnityEngine; |
|||
using UnityEngine.MachineLearning.InferenceEngine.Util; |
|||
|
|||
namespace MLAgents.InferenceBrain |
|||
{ |
|||
/// <summary>
|
|||
/// The Applier for the Continuous Action output tensor. Tensor is assumed to contain the
|
|||
/// continuous action data of the agents in the batch.
|
|||
/// </summary>
|
|||
public class ContinuousActionOutputApplier : TensorApplier.Applier |
|||
{ |
|||
public void Apply(Tensor tensor, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
var tensorDataAction = tensor.Data as float[,]; |
|||
var actionSize = tensor.Shape[1]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var action = new float[actionSize]; |
|||
for (var j = 0; j < actionSize; j++) |
|||
{ |
|||
action[j] = tensorDataAction[agentIndex, j]; |
|||
} |
|||
agent.UpdateVectorAction(action); |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
|
|||
/// actions from the logits contained in the tensor.
|
|||
/// </summary>
|
|||
public class DiscreteActionOutputApplier : TensorApplier.Applier |
|||
{ |
|||
private int[] _actionSize; |
|||
private Multinomial _multinomial; |
|||
|
|||
public DiscreteActionOutputApplier(int[] actionSize, int seed) |
|||
{ |
|||
_actionSize = actionSize; |
|||
_multinomial = new Multinomial(seed); |
|||
} |
|||
|
|||
public void Apply(Tensor tensor, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
var tensorDataProbabilities = tensor.Data as float[,]; |
|||
var batchSize = agentInfo.Keys.Count; |
|||
var actions = new float[batchSize, _actionSize.Length]; |
|||
var startActionIndices = Utilities.CumSum(_actionSize); |
|||
for (var actionIndex=0; actionIndex < _actionSize.Length; actionIndex++) |
|||
{ |
|||
var nBranchAction = _actionSize[actionIndex]; |
|||
var actionProbs = new float[batchSize, nBranchAction]; |
|||
for (var batchIndex = 0; batchIndex < batchSize; batchIndex++) |
|||
{ |
|||
for (var branchActionIndex = 0; |
|||
branchActionIndex < nBranchAction; |
|||
branchActionIndex++) |
|||
{ |
|||
actionProbs[batchIndex, branchActionIndex] = |
|||
tensorDataProbabilities[ |
|||
batchIndex, startActionIndices[actionIndex] + branchActionIndex]; |
|||
} |
|||
} |
|||
var inputTensor = new Tensor() |
|||
{ |
|||
ValueType = Tensor.TensorType.FloatingPoint, |
|||
Shape = new long[]{batchSize, _actionSize[actionIndex]}, |
|||
Data = actionProbs |
|||
}; |
|||
var outputTensor = new Tensor() |
|||
{ |
|||
ValueType = Tensor.TensorType.FloatingPoint, |
|||
Shape = new long[]{batchSize, 1}, |
|||
Data = new float[batchSize, 1] |
|||
}; |
|||
_multinomial.Eval(inputTensor, outputTensor); |
|||
var outTensor = outputTensor.Data as float[,]; |
|||
for (var ii = 0; ii < batchSize; ii++) |
|||
{ |
|||
actions[ii, actionIndex] = outTensor[ii, 0]; |
|||
} |
|||
} |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var action = new float[_actionSize.Length]; |
|||
for (var j = 0; j < _actionSize.Length; j++) |
|||
{ |
|||
action[j] = actions[agentIndex, j]; |
|||
} |
|||
agent.UpdateVectorAction(action); |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// The Applier for the Memory output tensor. Tensor is assumed to contain the new
|
|||
/// memory data of the agents in the batch.
|
|||
/// </summary>
|
|||
public class MemoryOutputApplier : TensorApplier.Applier |
|||
{ |
|||
public void Apply(Tensor tensor, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
var tensorDataMemory = tensor.Data as float[,]; |
|||
var agentIndex = 0; |
|||
var memorySize = tensor.Shape[1]; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var memory = new List<float>(); |
|||
for (var j = 0; j < memorySize; j++) |
|||
{ |
|||
memory.Add(tensorDataMemory[agentIndex, j]); |
|||
} |
|||
|
|||
agent.UpdateMemoriesAction(memory); |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// The Applier for the Value Estimate output tensor. Tensor is assumed to contain the
|
|||
/// value estimates of the agents in the batch.
|
|||
/// </summary>
|
|||
public class ValueEstimateApplier : TensorApplier.Applier |
|||
{ |
|||
public void Apply(Tensor tensor, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
var tensorDataValue = tensor.Data as float[,]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
agent.UpdateValueAction(tensorDataValue[agentIndex, 0]); |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 99d5dc2d52e442d1a1f466a246cfb28d |
|||
timeCreated: 1539118675 |
|
|||
using UnityEngine.MachineLearning.InferenceEngine; |
|||
using System.Collections.Generic; |
|||
using System; |
|||
using UnityEngine.MachineLearning.InferenceEngine.Util; |
|||
using System.Linq; |
|||
|
|||
namespace MLAgents.InferenceBrain |
|||
{ |
|||
/// <summary>
|
|||
/// Reshapes a Tensor so that its first dimension becomes equal to the current batch size
|
|||
/// and initializes its content to be zeros. Will only work on 2-dimensional tensors.
|
|||
/// The second dimension of the Tensor will not be modified.
|
|||
/// </summary>
|
|||
public class BiDimensionalOutputGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
var shapeSecondAxis = tensor.Shape[1]; |
|||
tensor.Shape[0] = batchSize; |
|||
if (tensor.ValueType == Tensor.TensorType.FloatingPoint) |
|||
{ |
|||
tensor.Data = new float[batchSize, shapeSecondAxis]; |
|||
} |
|||
else |
|||
{ |
|||
tensor.Data = new int[batchSize, shapeSecondAxis]; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the BatchSize input : Will be a one dimensional
|
|||
/// integer array of size 1 containing the batch size.
|
|||
/// </summary>
|
|||
public class BatchSizeGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
tensor.Data = new int[] {batchSize}; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the SequenceLength input : Will be a one
|
|||
/// dimensional integer array of size 1 containing 1.
|
|||
/// Note : the sequence length is always one since recurrent networks only predict for
|
|||
/// one step at the time.
|
|||
/// </summary>
|
|||
public class SequenceLengthGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
tensor.Data = new int[] {1}; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the VectorObservation input : Will be a two
|
|||
/// dimensional float array of dimension [batchSize x vectorObservationSize].
|
|||
/// It will use the Vector Observation data contained in the agentInfo to fill the data
|
|||
/// of the tensor.
|
|||
/// </summary>
|
|||
public class VectorObservationGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
tensor.Shape[0] = batchSize; |
|||
var vecObsSizeT = tensor.Shape[1]; |
|||
tensor.Data = new float[batchSize, vecObsSizeT]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var vectorObs = agentInfo[agent].stackedVectorObservation; |
|||
for (var j = 0; j < vecObsSizeT; j++) |
|||
{ |
|||
tensor.Data.SetValue(vectorObs[j], new int[2] {agentIndex, j}); |
|||
} |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the Recurrent input : Will be a two
|
|||
/// dimensional float array of dimension [batchSize x memorySize].
|
|||
/// It will use the Memory data contained in the agentInfo to fill the data
|
|||
/// of the tensor.
|
|||
/// </summary>
|
|||
public class RecurrentInputGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
tensor.Shape[0] = batchSize; |
|||
var memorySize = tensor.Shape[1]; |
|||
tensor.Data = new float[batchSize, memorySize]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var memory = agentInfo[agent].memories; |
|||
if (memory == null) |
|||
{ |
|||
agentIndex++; |
|||
continue; |
|||
} |
|||
for (var j = 0; j < Math.Min(memorySize, memory.Count); j++) |
|||
{ |
|||
if (j >= memory.Count) |
|||
{ |
|||
break; |
|||
} |
|||
tensor.Data.SetValue(memory[j], new int[2] {agentIndex, j}); |
|||
} |
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the Previous Action input : Will be a two
|
|||
/// dimensional integer array of dimension [batchSize x actionSize].
|
|||
/// It will use the previous action data contained in the agentInfo to fill the data
|
|||
/// of the tensor.
|
|||
/// </summary>
|
|||
public class PreviousActionInputGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
if (tensor.ValueType != Tensor.TensorType.Integer) |
|||
{ |
|||
throw new NotImplementedException( |
|||
"Previous Action Inputs are only valid for discrete control"); |
|||
} |
|||
|
|||
tensor.Shape[0] = batchSize; |
|||
var actionSize = tensor.Shape[1]; |
|||
tensor.Data = new int[batchSize, actionSize]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var pastAction = agentInfo[agent].storedVectorActions; |
|||
for (var j = 0; j < actionSize; j++) |
|||
{ |
|||
tensor.Data.SetValue((int) pastAction[j], new int[2] {agentIndex, j}); |
|||
} |
|||
|
|||
agentIndex++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Generates the Tensor corresponding to the Action Mask input : Will be a two
|
|||
/// dimensional float array of dimension [batchSize x numActionLogits].
|
|||
/// It will use the Action Mask data contained in the agentInfo to fill the data
|
|||
/// of the tensor.
|
|||
/// </summary>
|
|||
public class ActionMaskInputGenerator : TensorGenerator.Generator |
|||
{ |
|||
public void Generate(Tensor tensor, int batchSize, Dictionary<Agent, AgentInfo> agentInfo) |
|||
{ |
|||
tensor.Shape[0] = batchSize; |
|||
var maskSize = tensor.Shape[1]; |
|||
tensor.Data = new float[batchSize, maskSize]; |
|||
var agentIndex = 0; |
|||
foreach (var agent in agentInfo.Keys) |
|||
{ |
|||
var maskList = agentInfo[agent].actionMasks; |
|||
for (var j = 0; j < maskSize; j++) |
|||
|