比较提交

...
此合并请求有变更与目标分支冲突。
/Project/Packages/manifest.json
/Project/ProjectSettings/ProjectVersion.txt
/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
/com.unity.ml-agents/Runtime/Academy.cs
/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs

1 次代码提交

作者 SHA1 备注 提交日期
vincentpierre a9ca4a7d Moving the tensor applier arround 4 年前
共有 23 个文件被更改,包括 1106 次插入35 次删除
  1. 9
      Project/Packages/manifest.json
  2. 4
      Project/ProjectSettings/ProjectVersion.txt
  3. 41
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  4. 14
      com.unity.ml-agents/Runtime/Academy.cs
  5. 18
      com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
  6. 3
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  7. 1
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  8. 21
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  9. 1
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  10. 59
      com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
  11. 9
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  12. 11
      com.unity.ml-agents/Runtime/ReplayBuffer.cs.meta
  13. 11
      com.unity.ml-agents/Runtime/Trainer.cs.meta
  14. 87
      com.unity.ml-agents/Runtime/ReplayBuffer.cs
  15. 79
      com.unity.ml-agents/Runtime/Trainer.cs
  16. 267
      com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs
  17. 11
      com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs.meta
  18. 130
      com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs
  19. 11
      com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs.meta
  20. 11
      com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs.meta
  21. 84
      com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs
  22. 11
      com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs.meta
  23. 248
      com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs

9
Project/Packages/manifest.json


"com.unity.2d.sprite": "1.0.0",
"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "3.6.1",
"com.unity.collab-proxy": "1.2.16",
"com.unity.ide.rider": "1.1.4",
"com.unity.collab-proxy": "1.3.9",
"com.unity.ide.rider": "2.0.7",
"com.unity.ide.visualstudio": "2.0.7",
"com.unity.ide.vscode": "1.2.3",
"com.unity.ml-agents": "file:../../com.unity.ml-agents",
"com.unity.ml-agents.extensions": "file:../../com.unity.ml-agents.extensions",

"com.unity.textmeshpro": "2.0.1",
"com.unity.timeline": "1.2.6",
"com.unity.textmeshpro": "3.0.1",
"com.unity.timeline": "1.4.6",
"com.unity.ugui": "1.0.0",
"com.unity.xr.legacyinputhelpers": "2.1.7",
"com.unity.modules.ai": "1.0.0",

4
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2019.4.20f1
m_EditorVersionWithRevision: 2019.4.20f1 (6dd1c08eedfa)
m_EditorVersion: 2020.3.0f1
m_EditorVersionWithRevision: 2020.3.0f1 (c7b5465681fb)

41
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab


m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1036225416237908}
m_Material: {fileID: 13400000, guid: 56162663048874fd4b10e065f9cf78b7, type: 2}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!54 &54597526346971362
Rigidbody:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1321468028730240
GameObject:
m_ObjectHideFlags: 0

VectorObservationSize: 8
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 2
BranchSizes:
VectorActionSize: 02000000
m_NumContinuousActions: 0
BranchSizes: 0a0000000a000000
VectorActionSize: 0a0000000a000000
VectorActionSpaceType: 1
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 20a7b83be6b0c493d9271c65c897eb9b, type: 3}
m_Model: {fileID: 5022602860645237092, guid: 35d5202e6dbc04a50934f20df199b47f, type: 3}
m_BehaviorType: 0
m_BehaviorType: 3
m_BehaviorName: 3DBall
TeamId: 0
m_UseChildSensors: 1

m_ClearFlags: 2
m_BackGroundColor: {r: 0.46666667, g: 0.5647059, b: 0.60784316, a: 1}
m_projectionMatrixMode: 1
m_GateFitMode: 2
m_FOVAxisMode: 0
m_GateFitMode: 2
m_FocalLength: 50
m_NormalizedViewPortRect:
serializedVersion: 2

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1854695166504686
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1859240399150782
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1999020414315134
GameObject:
m_ObjectHideFlags: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_ReceiveGI: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}

14
com.unity.ml-agents/Runtime/Academy.cs


bool m_Initialized;
List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
List<Trainer> m_Trainers = new List<Trainer>();
// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;

// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
internal event Action TrainerUpdate;
/// <summary>
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.

{
AgentAct?.Invoke();
}
TrainerUpdate?.Invoke();
}
}

m_InferenceSeed++;
}
return modelRunner;
}
internal Trainer GetOrCreateTrainer(string behaviorName, ActionSpec actionSpec, NNModel model)
{
var trainer = m_Trainers.Find(x => x.BehaviorName == behaviorName);
if (trainer == null)
{
trainer = new Trainer(behaviorName, actionSpec, model);
m_Trainers.Add(trainer);
}
return trainer;
}
/// <summary>

18
com.unity.ml-agents/Runtime/Inference/TensorProxy.cs


using System;
using System.Linq;
using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents.Inference.Utils;

for (var i = 0; i < tensorProxy.data.length; i++)
{
tensorProxy.data[i] = (float)randomNormal.NextDouble();
}
}
public static void CopyTensor(TensorProxy source, TensorProxy target)
{
for (var b = 0; b < source.data.batch; b++)
{
for (var i = 0; i < source.data.height; i++)
{
for (var j = 0; j < source.data.width; j++)
{
for(var k = 0; k < source.data.channels; k++)
{
target.data[b, i, j, k] = source.data[b, i, j, k];
}
}
}
}
}
}

3
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


}
if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
{
m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
// m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
m_Dict[tensorName] = new MaxActionOutputApplier(actionSpec, seed, allocator);
}
}
m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);

1
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;
using UnityEngine;
namespace Unity.MLAgents.Inference
{

21
com.unity.ml-agents/Runtime/Inference/TensorNames.cs


using System.Collections.Generic;
using System.Linq;
using System;
namespace Unity.MLAgents.Inference
{
/// <summary>

// Deprecated TensorNames entries for backward compatibility
public const string IsContinuousControlDeprecated = "is_continuous_control";
public const string ActionOutputDeprecated = "action";
public const string ActionOutputDeprecated = "action_";
// Tensors for in-editor training
public const string Observations = "input";
public const string ActionInput = "action";
public const string RewardInput = "reward";
public const string DoneInput = "done";
public const string Gamma = "gamma";
public const string NextObservations = "next_state";
public const string LearningRate = "lr";
public const string TrainingStateIn = "training_state.1";
public const string TrainingOutput = "output";
public const string OuputLoss = "loss";
public const string TrainingStateOut = "training_state";
public const string InitialTrainingState = "initial_training_state";
/// <summary>
/// Returns the name of the visual observation with a given index

1
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


}
}
/// <summary>
/// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
/// actions from the logits contained in the tensor.

59
com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs


using System.Linq;
using Unity.Barracuda;
using FailedCheck = Unity.MLAgents.Inference.BarracudaModelParamLoader.FailedCheck;
using UnityEngine;
namespace Unity.MLAgents.Inference
{

/// <returns>The api version of the model</returns>
public static int GetVersion(this Model model)
{
return (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
// return (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
return 3;
}
/// <summary>

valueType = TensorProxy.TensorType.FloatingPoint,
data = null,
shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
});
}
tensors.Sort((el1, el2) => el1.name.CompareTo(el2.name));
return tensors;
}
public static IReadOnlyList<TensorProxy> GetTrainingInputTensors(this Model model)
{
var tensors = new List<TensorProxy>();
if (model == null)
return tensors;
foreach (var input in model.inputs)
{
tensors.Add(new TensorProxy
{
name = input.name,
valueType = TensorProxy.TensorType.FloatingPoint,
data = null,
shape = input.shape.Select(i => (long)i).ToArray()
});
}

public static bool CheckExpectedTensors(this Model model, List<FailedCheck> failedModelChecks)
{
// Check the presence of model version
var modelApiVersionTensor = model.GetTensorByName(TensorNames.VersionNumber);
if (modelApiVersionTensor == null)
{
failedModelChecks.Add(
FailedCheck.Warning($"Required constant \"{TensorNames.VersionNumber}\" was not found in the model file.")
);
return false;
}
// var modelApiVersionTensor = model.GetTensorByName(TensorNames.VersionNumber);
// if (modelApiVersionTensor == null)
// {
// failedModelChecks.Add(
// FailedCheck.Warning($"Required constant \"{TensorNames.VersionNumber}\" was not found in the model file.")
// );
// return false;
// }
var memorySizeTensor = model.GetTensorByName(TensorNames.MemorySize);
if (memorySizeTensor == null)
{
failedModelChecks.Add(
FailedCheck.Warning($"Required constant \"{TensorNames.MemorySize}\" was not found in the model file.")
);
return false;
}
// var memorySizeTensor = model.GetTensorByName(TensorNames.MemorySize);
// if (memorySizeTensor == null)
// {
// failedModelChecks.Add(
// FailedCheck.Warning($"Required constant \"{TensorNames.MemorySize}\" was not found in the model file.")
// );
// return false;
// }
// Check the presence of action output tensor
if (!model.outputs.Contains(TensorNames.ActionOutputDeprecated) &&

9
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


/// The Agent will always use inference with the provided
/// neural network model.
/// </summary>
InferenceOnly
InferenceOnly,
/// <summary>
/// C# training
/// </summary>
InEditorTraining
}
/// <summary>

}
return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName);
}
case BehaviorType.InEditorTraining:
return new TrainingPolicy(actionSpec, m_BehaviorName, m_Model);
case BehaviorType.Default:
if (Academy.Instance.IsCommunicatorOn)
{

11
com.unity.ml-agents/Runtime/ReplayBuffer.cs.meta


fileFormatVersion: 2
guid: be3c5834a200742ed983cd073dd69f9a
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

11
com.unity.ml-agents/Runtime/Trainer.cs.meta


fileFormatVersion: 2
guid: 8dd9e7f1621bd487998fd883b2518733
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

87
com.unity.ml-agents/Runtime/ReplayBuffer.cs


// Buffer for C# training
using System;
using System.Linq;
using Unity.Barracuda;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
namespace Unity.MLAgents
{
internal struct Transition
{
public IReadOnlyList<TensorProxy> state;
public ActionBuffers action;
public float reward;
public bool done;
public IReadOnlyList<TensorProxy> nextState;
}
internal class ReplayBuffer
{
List<Transition> m_Buffer;
int m_CurrentIndex;
int m_MaxSize;
public ReplayBuffer(int maxSize)
{
m_Buffer = new List<Transition>();
m_Buffer.Capacity = maxSize;
m_MaxSize = maxSize;
}
public int Count
{
get => m_Buffer.Count;
}
public void Push(AgentInfo info, IReadOnlyList<TensorProxy> state, IReadOnlyList<TensorProxy> nextState)
{
if (m_Buffer.Count < m_MaxSize)
{
m_Buffer.Add(new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState});
}
else
{
m_Buffer[m_CurrentIndex] = new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState};
}
m_CurrentIndex += 1;
m_CurrentIndex = m_CurrentIndex % m_MaxSize;
}
public List<Transition> SampleBatch(int batchSize)
{
var indexList = SampleIndex(batchSize);
var samples = new List<Transition>(batchSize);
for (var i = 0; i < batchSize; i++)
{
samples.Add(m_Buffer[indexList[i]]);
}
return samples;
}
public List<Transition> SampleDummyBatch(int batchSize)
{
var indexList = SampleIndex(batchSize);
var samples = new List<Transition>(batchSize);
for (var i = 0; i < batchSize; i++)
{
samples.Add(m_Buffer[m_CurrentIndex-1]);
}
return samples;
}
private List<int> SampleIndex(int batchSize)
{
Random random = new Random();
HashSet<int> index = new HashSet<int>();
while (index.Count < batchSize)
{
index.Add(random.Next(m_Buffer.Count));
}
return index.ToList();
}
}
}

79
com.unity.ml-agents/Runtime/Trainer.cs


// Trainer for C# training. One trainer per behavior.
using System;
using Unity.MLAgents.Actuators;
using Unity.Barracuda;
using UnityEngine;
namespace Unity.MLAgents
{
internal class TrainerConfig
{
public int bufferSize = 100;
public int batchSize = 4;
public float gamma = 0.99f;
public float learningRate = 0.0005f;
public int updateTargetFreq = 200;
}
internal class Trainer: IDisposable
{
ReplayBuffer m_Buffer;
TrainingModelRunner m_ModelRunner;
TrainingModelRunner m_TargetModelRunner;
string m_behaviorName;
TrainerConfig m_Config;
int m_TrainingStep;
public Trainer(string behaviorName, ActionSpec actionSpec, NNModel model, int seed=0, TrainerConfig config=null)
{
m_Config = config ?? new TrainerConfig();
m_behaviorName = behaviorName;
m_Buffer = new ReplayBuffer(m_Config.bufferSize);
m_ModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed);
m_TargetModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed);
// copy weights from model to target model
// m_TargetModelRunner.model.weights = m_ModelRunner.model.weights
Academy.Instance.TrainerUpdate += Update;
}
public string BehaviorName
{
get => m_behaviorName;
}
public ReplayBuffer Buffer
{
get => m_Buffer;
}
public TrainingModelRunner TrainerModelRunner
{
get => m_ModelRunner;
}
public void Dispose()
{
Academy.Instance.TrainerUpdate -= Update;
}
public void Update()
{
if (m_Buffer.Count < m_Config.batchSize * 2)
{
return;
}
var samples = m_Buffer.SampleBatch(m_Config.batchSize);
m_ModelRunner.UpdateModel(samples);
// Update target network
if (m_TrainingStep % m_Config.updateTargetFreq == 0)
{
// copy weights
}
m_TrainingStep += 1;
}
}
}

267
com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs


using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents.Sensors;
using Unity.MLAgents;
using UnityEngine;
namespace Unity.MLAgents.Inference
{
internal class TrainingTensorGenerator
{
public interface ITrainingGenerator
{
void Generate(
TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState);
}
readonly Dictionary<string, ITrainingGenerator> m_Dict = new Dictionary<string, ITrainingGenerator>();
public TrainingTensorGenerator(
int seed,
ITensorAllocator allocator,
float learning_rate,
float gamma,
object barracudaModel = null
)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
{
return;
}
var model = (Model)barracudaModel;
// Generator for Inputs
var obsGen = new CopyObservationTensorsGenerator(allocator);
obsGen.SetSensorIndex(0);
m_Dict[TensorNames.Observations] = obsGen;
var nextObsGen = new CopyNextObservationTensorsGenerator(allocator);
nextObsGen.SetSensorIndex(0);
m_Dict[TensorNames.NextObservations] = nextObsGen;
m_Dict[TensorNames.ActionInput] = new ActionInputGenerator(allocator);
m_Dict[TensorNames.RewardInput] = new RewardInputGenerator(allocator);
m_Dict[TensorNames.DoneInput] = new DoneInputGenerator(allocator);
m_Dict[TensorNames.LearningRate] = new ConstantGenerator(allocator,learning_rate);
m_Dict[TensorNames.Gamma] = new ConstantGenerator(allocator, gamma);
m_Dict[TensorNames.BatchSizePlaceholder] = new TrainingBatchSizeGenerator(allocator);
m_Dict[TensorNames.TrainingStateIn] = new TrainingStateGenerator(allocator);
}
/// <summary>
/// Populates the data of the tensor inputs given the data contained in the current batch
/// of agents.
/// </summary>
/// <param name="tensors"> Enumerable of tensors that will be modified.</param>
/// <param name="currentBatchSize"> The number of agents present in the current batch
/// </param>
/// <param name="infos"> List of AgentsInfos and Sensors that contains the
/// data that will be used to modify the tensors</param>
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<Transition> transitions, TensorProxy trainingState, bool training=false)
{
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
{
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(
$"Unknown tensorProxy expected as input : {tensor.name}");
}
if ((tensor.name == TensorNames.Observations || tensor.name == TensorNames.BatchSizePlaceholder) && training == false)
{
continue;
}
m_Dict[tensor.name].Generate(tensor, currentBatchSize, transitions, trainingState);
}
}
public static void CopyTensorToBatch(TensorProxy source, TensorProxy target, int batchIndex)
{
for (var i = 0; i < source.Height; i++)
{
for (var j = 0; j < source.Width; j++)
{
for(var k = 0; k < source.Channels; k++)
{
target.data[batchIndex, i, j, k] = source.data[0, i, j, k];
}
}
}
}
}
internal class ActionInputGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
public ActionInputGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
var actions = transitions[index].action.DiscreteActions;
for (var j = 0; j < actions.Length; j++)
{
tensorProxy.data[index, j] = actions[j];
}
}
}
}
internal class RewardInputGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
public RewardInputGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
tensorProxy.data[index, 0] = transitions[index].reward;
}
}
}
internal class DoneInputGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
public DoneInputGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
tensorProxy.data[index, 0] = transitions[index].done==true ? 1f : 0f;
}
}
}
internal class CopyObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
int m_SensorIndex;
public CopyObservationTensorsGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void SetSensorIndex(int index)
{
m_SensorIndex = index;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
TrainingTensorGenerator.CopyTensorToBatch(transitions[index].state[m_SensorIndex], tensorProxy, index);
}
}
}
internal class CopyNextObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
int m_SensorIndex;
public CopyNextObservationTensorsGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void SetSensorIndex(int index)
{
m_SensorIndex = index;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
TrainingTensorGenerator.CopyTensorToBatch(transitions[index].nextState[m_SensorIndex], tensorProxy, index);
}
}
}
internal class ConstantGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
float m_Const;
public ConstantGenerator(ITensorAllocator allocator, float c)
{
m_Allocator = allocator;
m_Const = c;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, 1, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
tensorProxy.data[0] = m_Const;
}
}
}
internal class TrainingBatchSizeGenerator : TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
public TrainingBatchSizeGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
tensorProxy.data[0] = batchSize;
}
}
internal class TrainingStateGenerator: TrainingTensorGenerator.ITrainingGenerator
{
readonly ITensorAllocator m_Allocator;
public TrainingStateGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
for (var index = 0; index < batchSize; index++)
{
TensorUtils.CopyTensor(trainingState, tensorProxy);
}
}
}
}

11
com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs.meta


fileFormatVersion: 2
guid: cca690e21a2fe49b49f636cd4e76e0b4
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

130
com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs


using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using System.Linq;
using Unity.MLAgents.Inference.Utils;
using UnityEngine;
namespace Unity.MLAgents.Inference
{
/// <summary>
/// Mapping between the output tensor names and the method that will use the
/// output tensors and the Agents present in the batch to update their action, memories and
/// value estimates.
/// A TensorApplier implements a Dictionary of strings (node names) to an Action.
/// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
/// the current batch.
/// </summary>
internal class TrainingForwardTensorApplier
{
readonly Dictionary<string, TensorApplier.IApplier> m_Dict = new Dictionary<string, TensorApplier.IApplier>();
/// <summary>
/// Returns a new TensorAppliers object.
/// </summary>
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
/// <param name="seed"> The seed the Appliers will be initialized with.</param>
/// <param name="allocator"> Tensor allocator</param>
/// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
/// <param name="barracudaModel"></param>
public TrainingForwardTensorApplier(
ActionSpec actionSpec,
int seed,
ITensorAllocator allocator,
object barracudaModel = null)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
{
return;
}
if (actionSpec.NumContinuousActions > 0)
{
throw new System.Exception("Cannot do continuous actions");
}
if (actionSpec.NumDiscreteActions != 1)
{
throw new System.Exception("Cannot do multi discrete actions, only single discrete");
}
var model = (Model)barracudaModel;
m_Dict[TensorNames.TrainingOutput] = new MaxActionOutputApplier(actionSpec, seed, allocator);
}
/// <summary>
/// Updates the state of the agents based on the data present in the tensor.
/// </summary>
/// <param name="tensors"> Enumerable of tensors containing the data.</param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
{
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(
$"Unknown tensorProxy expected as output : {tensor.name}");
}
m_Dict[tensor.name].Apply(tensor, actionIds, lastActions);
}
}
}
internal class MaxActionOutputApplier : TensorApplier.IApplier
{
readonly ActionSpec m_ActionSpec;
public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
{
m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
{
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
}
var discreteBuffer = actionBuffer.DiscreteActions;
var maxIndex = 0;
var maxValue = 0;
for (var j = 0; j < actionSpaceSize; j++)
{
var value = (int)tensorProxy.data[agentIndex, j];
if (value > maxValue)
{
maxIndex = j;
}
}
var actionSize = discreteBuffer.Length;
discreteBuffer[0] = maxIndex;
}
agentIndex++;
}
}
}
}

11
com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs.meta


fileFormatVersion: 2
guid: eaafcce9c7c794667bc726e40e420824
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

11
com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs.meta


fileFormatVersion: 2
guid: 03ace8815cd804ee994a5068f618b845
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

84
com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs


// Policy for C# training
using Unity.Barracuda;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies
{
internal class TrainingPolicy : IPolicy
{
protected TrainingModelRunner m_ModelRunner;
ActionBuffers m_LastActionBuffer;
int m_AgentId;
ActionSpec m_ActionSpec;
string m_BehaviorName;
AgentInfo m_LastInfo;
IReadOnlyList<TensorProxy> m_LastObservations;
ReplayBuffer m_buffer;
IReadOnlyList<TensorProxy> m_CurrentObservations;
/// <inheritdoc />
public TrainingPolicy(
ActionSpec actionSpec,
string behaviorName,
NNModel model
)
{
var trainer = Academy.Instance.GetOrCreateTrainer(behaviorName, actionSpec, model);
m_ModelRunner = trainer.TrainerModelRunner;
m_buffer = trainer.Buffer;
m_CurrentObservations = m_ModelRunner.GetInputTensors();
m_BehaviorName = behaviorName;
m_ActionSpec = actionSpec;
}
/// <inheritdoc />
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
m_AgentId = info.episodeId;
m_ModelRunner.PutObservations(info, sensors);
m_ModelRunner.GetObservationTensors(m_CurrentObservations, info, sensors);
if (m_LastObservations != null)
{
m_buffer.Push(m_LastInfo, m_LastObservations, m_CurrentObservations);
}
else if (m_buffer.Count == 0)
{
// hack
m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations);
}
m_LastInfo = info;
m_LastObservations = m_CurrentObservations;
if (info.done == true)
{
m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations); // dummy next_state
m_LastObservations = null;
}
}
/// <inheritdoc />
public ref readonly ActionBuffers DecideAction()
{
m_ModelRunner.DecideBatch();
m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId);
return ref m_LastActionBuffer;
}
public void Dispose()
{
}
}
}

11
com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs.meta


fileFormatVersion: 2
guid: 30a25b3276c294e5eb07b57fc1af4bdb
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

248
com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs


// ModelRunner for C# training.
using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;
using UnityEngine;
using Unity.MLAgents.Inference.Utils;
namespace Unity.MLAgents
{
internal class TrainingModelRunner
{
List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>();
List<int> m_OrderedAgentsRequestingDecisions = new List<int>();
TensorProxy m_TrainingState;
ITensorAllocator m_TensorAllocator;
TensorGenerator m_TensorGenerator;
TrainingTensorGenerator m_TrainingTensorGenerator;
TrainingForwardTensorApplier m_TensorApplier;
Model m_Model;
IWorker m_Engine;
bool m_Verbose = false;
string[] m_OutputNames;
IReadOnlyList<TensorProxy> m_TrainingInputs;
List<TensorProxy> m_TrainingOutputs;
Dictionary<string, Tensor> m_InputsByName;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
bool m_ObservationsInitialized;
bool m_TrainingObservationsInitialized;
ReplayBuffer m_Buffer;
/// <summary>
/// Initializes the Brain with the Model that it will use when selecting actions for
/// the agents
/// </summary>
/// <param name="model"> The Barracuda model to load </param>
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
/// <param name="inferenceDevice"> Inference execution device. CPU is the fastest
/// option for most of ML Agents models. </param>
/// <param name="seed"> The seed that will be used to initialize the RandomNormal
/// and Multinomial objects used when running inference.</param>
/// <exception cref="UnityAgentsException">Throws an error when the model is null
/// </exception>
public TrainingModelRunner(
ActionSpec actionSpec,
NNModel model,
ReplayBuffer buffer,
TrainerConfig config,
int seed = 0)
{
Model barracudaModel;
m_TensorAllocator = new TensorCachingAllocator();
// barracudaModel = Barracuda.SomeModelBuilder.CreateModel();
barracudaModel = ModelLoader.Load(model);
m_Model = barracudaModel;
WorkerFactory.Type executionDevice = WorkerFactory.Type.CSharpBurst;
m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
m_TrainingInputs = barracudaModel.GetTrainingInputTensors();
m_OutputNames = barracudaModel.GetOutputNames();
InitializeTrainingState(barracudaModel);
m_TensorGenerator = new TensorGenerator(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TrainingTensorGenerator = new TrainingTensorGenerator(
seed, m_TensorAllocator, config.learningRate, config.gamma, barracudaModel);
m_TensorApplier = new TrainingForwardTensorApplier(
actionSpec, seed, m_TensorAllocator, barracudaModel);
m_InputsByName = new Dictionary<string, Tensor>();
m_TrainingOutputs = new List<TensorProxy>();
m_Buffer = buffer;
}
void InitializeTrainingState(Model barracudaModel)
{
m_TrainingState = new TensorProxy
{
data = barracudaModel.GetTensorByName(TensorNames.InitialTrainingState)
};
}
void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs)
{
m_InputsByName.Clear();
for (var i = 0; i < infInputs.Count; i++)
{
var inp = infInputs[i];
m_InputsByName[inp.name] = inp.data;
}
}
public void Dispose()
{
if (m_Engine != null)
m_Engine.Dispose();
m_TensorAllocator?.Reset(false);
}
void FetchBarracudaOutputs(string[] names)
{
m_TrainingOutputs.Clear();
foreach (var n in names)
{
var output = m_Engine.PeekOutput(n);
m_TrainingOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
}
}
public void PutObservations(AgentInfo info, List<ISensor> sensors)
{
m_Infos.Add(new AgentInfoSensorsPair
{
agentInfo = info,
sensors = sensors
});
// We add the episodeId to this list to maintain the order in which the decisions were requested
m_OrderedAgentsRequestingDecisions.Add(info.episodeId);
if (!m_LastActionsReceived.ContainsKey(info.episodeId))
{
m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty;
}
if (info.done)
{
// If the agent is done, we remove the key from the last action dictionary since no action
// should be taken.
m_LastActionsReceived.Remove(info.episodeId);
}
}
public void GetObservationTensors(IReadOnlyList<TensorProxy> tensors, AgentInfo info, List<ISensor> sensors)
{
if (!m_ObservationsInitialized)
{
// Just grab the first agent in the collection (any will suffice, really).
// We check for an empty Collection above, so this will always return successfully.
m_TensorGenerator.InitializeObservations(sensors, m_TensorAllocator);
m_ObservationsInitialized = true;
}
var infoSensorPair = new AgentInfoSensorsPair
{
agentInfo = info,
sensors = sensors
};
m_TensorGenerator.GenerateTensors(tensors, 1, new List<AgentInfoSensorsPair> { infoSensorPair });
}
public IReadOnlyList<TensorProxy> GetInputTensors()
{
return m_Model.GetInputTensors();
}
public void DecideBatch()
{
var currentBatchSize = m_Infos.Count;
if (currentBatchSize == 0)
{
return;
}
if (!m_ObservationsInitialized)
{
// Just grab the first agent in the collection (any will suffice, really).
// We check for an empty Collection above, so this will always return successfully.
var firstInfo = m_Infos[0];
m_TensorGenerator.InitializeObservations(firstInfo.sensors, m_TensorAllocator);
m_ObservationsInitialized = true;
}
// Prepare the input tensors to be feed into the engine
m_TensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Infos);
m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Buffer.SampleDummyBatch(currentBatchSize), m_TrainingState);
PrepareBarracudaInputs(m_TrainingInputs);
// Execute the Model
m_Engine.Execute(m_InputsByName);
FetchBarracudaOutputs(m_OutputNames);
// Update the outputs
m_TensorApplier.ApplyTensors(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
m_Infos.Clear();
m_OrderedAgentsRequestingDecisions.Clear();
}
public void UpdateModel(List<Transition> transitions)
{
var currentBatchSize = transitions.Count;
if (currentBatchSize == 0)
{
return;
}
m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, transitions, m_TrainingState, true);
PrepareBarracudaInputs(m_TrainingInputs);
// Execute the Model
m_Engine.Execute(m_InputsByName);
// Update the model
FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut });
m_TrainingState = m_TrainingOutputs[0];
}
public ActionBuffers GetAction(int agentId)
{
if (m_LastActionsReceived.ContainsKey(agentId))
{
return m_LastActionsReceived[agentId];
}
return ActionBuffers.Empty;
}
// void PrintTensor(TensorProxy tensor)
// {
// Debug.Log($"Print tensor {tensor.name}");
// for (var b = 0; b < tensor.data.batch; b++)
// {
// var message = new List<float>();
// for (var i = 0; i < tensor.data.height; i++)
// {
// for (var j = 0; j < tensor.data.width; j++)
// {
// for(var k = 0; k < tensor.data.channels; k++)
// {
// message.Add(tensor.data[b, i, j, k]);
// }
// }
// }
// Debug.Log(string.Join(", ", message));
// }
// }
}
}
正在加载...
取消
保存