浏览代码

Moving the Tensor Applier around (#5185)

Co-authored-by: Ruo-Ping Dong <ruoping.dong@unity3d.com>
/ai-hw-2021
GitHub 4 年前
当前提交
00fc501e
共有 8 个文件被更改,包括 158 次插入124 次删除
  1. 2
      Project/Packages/manifest.json
  2. 93
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  3. 18
      com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
  4. 3
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  5. 17
      com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs
  6. 8
      com.unity.ml-agents/Runtime/Training.meta
  7. 130
      com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs
  8. 11
      com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs.meta

2
Project/Packages/manifest.json


"com.unity.2d.sprite": "1.0.0",
"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "3.6.1",
"com.unity.collab-proxy": "1.3.9",
"com.unity.ide.rider": "2.0.7",
"com.unity.ide.visualstudio": "2.0.7",
"com.unity.ide.vscode": "1.2.3",

"com.unity.nuget.newtonsoft-json": "2.0.0",
"com.unity.test-framework": "1.1.22",
"com.unity.textmeshpro": "3.0.1",
"com.unity.timeline": "1.4.6",
"com.unity.ugui": "1.0.0",
"com.unity.xr.legacyinputhelpers": "2.1.7",

93
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


}
}
internal class MaxActionOutputApplier : TensorApplier.IApplier
{
readonly ActionSpec m_ActionSpec;
public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
{
m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
{
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
}
var discreteBuffer = actionBuffer.DiscreteActions;
var maxIndex = 0;
var maxValue = 0;
for (var j = 0; j < actionSpaceSize; j++)
{
var value = (int)tensorProxy.data[agentIndex, j];
if (value > maxValue)
{
maxIndex = j;
}
}
var actionSize = discreteBuffer.Length;
discreteBuffer[0] = maxIndex;
}
agentIndex++;
}
}
}
internal class ContinuousFromDiscreteOutputApplier : TensorApplier.IApplier
{
readonly ActionSpec m_ActionSpec;
int m_NumDiscretization;
public ContinuousFromDiscreteOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator, int numDiscretization)
{
m_ActionSpec = actionSpec;
m_NumDiscretization = numDiscretization;
}
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
{
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
}
var continuousBuffer = actionBuffer.ContinuousActions;
var maxIndex = 0;
var maxValue = 0;
for (var j = 0; j < actionSpaceSize; j++)
{
var value = (int)tensorProxy.data[agentIndex, j];
if (value > maxValue)
{
maxIndex = j;
}
}
continuousBuffer[0] = ((maxIndex/m_NumDiscretization)/(m_NumDiscretization-1)/2)-1;
continuousBuffer[1] = ((maxIndex%m_NumDiscretization)/(m_NumDiscretization-1)/2)-1;
}
agentIndex++;
}
}
}
/// <summary>
/// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
/// actions from the logits contained in the tensor.

18
com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs


return names.ToArray();
}
public static string[] GetTrainingOutputNames(this Model model)
{
var names = new List<string>();
if (model == null)
{
return names.ToArray();
}
names.Add(TensorNames.TrainingStateOut);
names.Add(TensorNames.OuputLoss);
names.Add(TensorNames.TrainingOutput);
names.Sort();
return names.ToArray();
}
/// <summary>
/// Check if the model has continuous action outputs.
/// </summary>

3
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


}
if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
{
// m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
m_Dict[tensorName] = new MaxActionOutputApplier(actionSpec, seed, allocator);
m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
}
}
m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);

17
com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs


ITensorAllocator m_TensorAllocator;
TensorGenerator m_TensorGenerator;
TrainingTensorGenerator m_TrainingTensorGenerator;
TensorApplier m_TensorApplier;
TrainingForwardTensorApplier m_TensorApplier;
string[] m_TrainingOutputNames;
IReadOnlyList<TensorProxy> m_TrainingInputs;
IReadOnlyList<TensorProxy> m_InferenceInputs;
List<TensorProxy> m_TrainingOutputs;

seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TrainingTensorGenerator = new TrainingTensorGenerator(
seed, m_TensorAllocator, config.learningRate, config.gamma, barracudaModel);
m_TensorApplier = new TensorApplier(
actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TrainingForwardTensorApplier(
actionSpec, seed, m_TensorAllocator, barracudaModel);
m_InputsByName = new Dictionary<string, Tensor>();
m_TrainingOutputs = new List<TensorProxy>();
m_Buffer = buffer;

void InitializeTrainingState()
{
var initState = m_Model.GetTensorByName(TensorNames.InitialTrainingState);
m_TrainingState = new TensorProxy{
m_TrainingState = new TensorProxy
{
name = TensorNames.InitialTrainingState,
valueType = TensorProxy.TensorType.FloatingPoint,
data = initState,

// Execute the Model
m_Engine.Execute(m_InputsByName);
FetchBarracudaOutputs(m_TrainingOutputNames);
FetchBarracudaOutputs(new string[] { TensorNames.TrainingOutput });
// Update the outputs
m_TensorApplier.ApplyTensors(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);

// Execute the Model
m_Engine.Execute(m_InputsByName);
FetchBarracudaOutputs(m_TrainingOutputNames);
// m_TensorApplier.UpdateModel(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut });
m_TrainingState = m_TrainingOutputs[0];
}
public ActionBuffers GetAction(int agentId)

8
com.unity.ml-agents/Runtime/Training.meta


fileFormatVersion: 2
guid: 676cc58c5738749bf836e799a89c7c94
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

130
com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs


using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using System.Linq;
using Unity.MLAgents.Inference.Utils;
using UnityEngine;
namespace Unity.MLAgents.Inference
{
/// <summary>
/// Mapping between the output tensor names and the method that will use the
/// output tensors and the Agents present in the batch to update their action, memories and
/// value estimates.
/// A TensorApplier implements a Dictionary of strings (node names) to an Action.
/// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
/// the current batch.
/// </summary>
internal class TrainingForwardTensorApplier
{
readonly Dictionary<string, TensorApplier.IApplier> m_Dict = new Dictionary<string, TensorApplier.IApplier>();
/// <summary>
/// Returns a new TensorAppliers object.
/// </summary>
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
/// <param name="seed"> The seed the Appliers will be initialized with.</param>
/// <param name="allocator"> Tensor allocator</param>
/// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
/// <param name="barracudaModel"></param>
public TrainingForwardTensorApplier(
ActionSpec actionSpec,
int seed,
ITensorAllocator allocator,
object barracudaModel = null)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
{
return;
}
if (actionSpec.NumContinuousActions > 0)
{
throw new System.Exception("Cannot do continuous actions");
}
if (actionSpec.NumDiscreteActions != 1)
{
throw new System.Exception("Cannot do multi discrete actions, only single discrete");
}
var model = (Model)barracudaModel;
m_Dict[TensorNames.TrainingOutput] = new MaxActionOutputApplier(actionSpec, seed, allocator);
}
/// <summary>
/// Updates the state of the agents based on the data present in the tensor.
/// </summary>
/// <param name="tensors"> Enumerable of tensors containing the data.</param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
{
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(
$"Unknown tensorProxy expected as output : {tensor.name}");
}
m_Dict[tensor.name].Apply(tensor, actionIds, lastActions);
}
}
}
internal class MaxActionOutputApplier : TensorApplier.IApplier
{
readonly ActionSpec m_ActionSpec;
public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
{
m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
{
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
}
var discreteBuffer = actionBuffer.DiscreteActions;
var maxIndex = 0;
var maxValue = 0;
for (var j = 0; j < actionSpaceSize; j++)
{
var value = (int)tensorProxy.data[agentIndex, j];
if (value > maxValue)
{
maxIndex = j;
}
}
var actionSize = discreteBuffer.Length;
discreteBuffer[0] = maxIndex;
}
agentIndex++;
}
}
}
}

11
com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs.meta


fileFormatVersion: 2
guid: a2677467266ab48cfb01c5d873d043a9
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存