浏览代码

Adding a timeScale modifier and fixed a bug (#5186)

/ai-hw-2021
GitHub 4 年前
当前提交
c1c7360b
共有 12 个文件被更改,包括 219 次插入48 次删除
  1. 14
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  2. 44
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  3. 1
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  4. 44
      Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  5. 4
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  6. 2
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
  7. 8
      com.unity.ml-agents/Runtime/Training/ReplayBuffer.cs
  8. 21
      com.unity.ml-agents/Runtime/Training/Trainer.cs
  9. 24
      com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs
  10. 13
      com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs
  11. 81
      com.unity.ml-agents/Runtime/Training/MyTimeScaleSetting.cs
  12. 11
      com.unity.ml-agents/Runtime/Training/MyTimeScaleSetting.cs.meta

14
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab


m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!54 &54597526346971362
Rigidbody:
m_ObjectHideFlags: 0

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1321468028730240
GameObject:
m_ObjectHideFlags: 0

VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 5022602860645237092, guid: 0f7c224f06ff5488ca51f68add34b904, type: 3}
m_Model: {fileID: 5022602860645237092, guid: 115ee9c4578384e26b8fd97aab2d76b2, type: 3}
m_InferenceDevice: 2
m_BehaviorType: 3
m_BehaviorName: 3DBall

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1854695166504686
GameObject:
m_ObjectHideFlags: 0

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1859240399150782
GameObject:
m_ObjectHideFlags: 0

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}
--- !u!1 &1999020414315134
GameObject:
m_ObjectHideFlags: 0

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
m_AdditionalVertexStreams: {fileID: 0}

44
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity


--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0
serializedVersion: 12
serializedVersion: 11
m_GIWorkflowMode: 0
m_GISettings:
serializedVersion: 2

m_TrainingDataDestination: TrainingData
m_LightProbeSampleCountMultiplier: 4
m_LightingDataAsset: {fileID: 0}
m_LightingSettings: {fileID: 4890085278179872738, guid: 733585ffa1510453eacdfe03bb27056e,
type: 2}
m_UseShadowmask: 1
--- !u!196 &4
NavMeshSettings:
serializedVersion: 2

manualTileSize: 0
tileSize: 256
accuratePlacement: 0
maxJobWorkers: 0
preserveTilesOutsideBounds: 0
debug:
m_Flags: 0
m_NavMeshData: {fileID: 0}

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

m_Component:
- component: {fileID: 1583402088}
- component: {fileID: 1583402090}
- component: {fileID: 1583402089}
m_Layer: 0
m_Name: Ball3DSettings
m_TagString: Untagged

m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1583402089
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1583402087}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: b1f466227168940b7ac84e337e9fcc8a, type: 3}
m_Name:
m_EditorClassIdentifier:
m_TimeScale: 1
m_Greedy: 0
--- !u!114 &1583402090
MonoBehaviour:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder

1
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


int d_act = actionBuffers.DiscreteActions[0];
var actionZ = (d_act % 10) * 1.0f / 4.5f - 1;
var actionX = (d_act / 10) * 1.0f / 4.5f - 1;
// UnityEngine.Debug.Log((d_act / 10) +" "+ (d_act % 10) +" "+d_act);
// var actionZ = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f);
// var actionX = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f);

44
Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


m_EnableBakedLightmaps: 1
m_EnableRealtimeLightmaps: 1
m_LightmapEditorSettings:
serializedVersion: 10
serializedVersion: 12
m_Resolution: 2
m_BakeResolution: 40
m_AtlasSize: 1024

m_CompAOExponentDirect: 0
m_ExtractAmbientOcclusion: 0
m_Padding: 2
m_LightmapParameters: {fileID: 0}
m_LightmapsBakeMode: 1

m_PVRDirectSampleCount: 32
m_PVRSampleCount: 500
m_PVRBounces: 2
m_PVREnvironmentSampleCount: 500
m_PVREnvironmentReferencePointCount: 2048
m_PVRFilteringMode: 2
m_PVRDenoiserTypeDirect: 0
m_PVRDenoiserTypeIndirect: 0
m_PVRDenoiserTypeAO: 0
m_PVRFilteringMode: 1
m_PVREnvironmentMIS: 0
m_PVRCulling: 1
m_PVRFilteringGaussRadiusDirect: 1
m_PVRFilteringGaussRadiusIndirect: 5

m_PVRFilteringAtrousPositionSigmaAO: 1
m_ShowResolutionOverlay: 1
m_ExportTrainingData: 0
m_TrainingDataDestination: TrainingData
m_LightProbeSampleCountMultiplier: 4
m_LightingDataAsset: {fileID: 0}
m_UseShadowmask: 1
--- !u!196 &4

m_ClearFlags: 2
m_BackGroundColor: {r: 0.46666667, g: 0.5647059, b: 0.60784316, a: 1}
m_projectionMatrixMode: 1
m_GateFitMode: 2
m_FOVAxisMode: 0
m_GateFitMode: 2
m_FocalLength: 50
m_NormalizedViewPortRect:
serializedVersion: 2

propertyPath: m_BrainParameters.VectorObservationSize
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114502619508238574, guid: c5eb289873aca4f5a8cc59c7464ab7c1,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 5022602860645237092, guid: b6df909c6da0e49d39650555d30cae9d,
type: 3}
- target: {fileID: 114502619508238574, guid: c5eb289873aca4f5a8cc59c7464ab7c1,
type: 3}
propertyPath: m_BehaviorType
value: 3
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: c5eb289873aca4f5a8cc59c7464ab7c1, type: 3}
--- !u!1 &1889211226

m_Component:
- component: {fileID: 1889211228}
- component: {fileID: 1889211227}
- component: {fileID: 1889211229}
m_Layer: 0
m_Name: BasicSettings
m_TagString: Untagged

m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1889211229
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1889211226}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: b1f466227168940b7ac84e337e9fcc8a, type: 3}
m_Name:
m_EditorClassIdentifier:
m_TimeScale: 1
m_Greedy: 1

4
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


switch (movement)
{
case 1:
direction = -1;
direction = 1;
direction = 1;
direction = -1;
break;
}

2
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs


{
return;
}
if (Academy.Instance.IsCommunicatorOn)
if (Academy.Instance.IsCommunicatorOn || true)
{
m_Agent?.RequestDecision();
}

8
com.unity.ml-agents/Runtime/Training/ReplayBuffer.cs


{
if (m_Buffer.Count < m_MaxSize)
{
m_Buffer.Add(new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState});
m_Buffer.Add(new Transition() { state = state, action = info.storedActions, reward = info.reward, done = info.done, nextState = nextState });
m_Buffer[m_CurrentIndex] = new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState};
m_Buffer[m_CurrentIndex] = new Transition() { state = state, action = info.storedActions, reward = info.reward, done = info.done, nextState = nextState };
}
m_CurrentIndex += 1;
m_CurrentIndex = m_CurrentIndex % m_MaxSize;

private List<int> SampleIndex(int batchSize)
{
if (batchSize > m_Buffer.Count * 2)
{
return new int[batchSize].ToList();
}
Random random = new Random();
HashSet<int> index = new HashSet<int>();

21
com.unity.ml-agents/Runtime/Training/Trainer.cs


{
internal class TrainerConfig
{
public int bufferSize = 1024;
public int batchSize = 128;
public int bufferSize = 500000;
public int batchSize = 100;
public float learningRate = 0.005f;
public float learningRate = 0.0001f;
public int updatePeriod = 10;
internal class Trainer: IDisposable
internal class Trainer : IDisposable
{
ReplayBuffer m_Buffer;
TrainingModelRunner m_ModelRunner;

int m_TrainingStep;
public Trainer(string behaviorName, ActionSpec actionSpec, NNModel model, int seed=0, TrainerConfig config=null)
public Trainer(string behaviorName, ActionSpec actionSpec, NNModel model, int seed = 0, TrainerConfig config = null)
{
m_Config = config ?? new TrainerConfig();
m_behaviorName = behaviorName;

public void Update()
{
if (!MyTimeScaleSetting.instance.IsTraining)
{
return;
}
if (m_TrainingStep % m_Config.updatePeriod != 0)
{
m_TrainingStep += 1;
return;
}
if (m_Buffer.Count < m_Config.batchSize * 2)
{
return;

m_ModelRunner.UpdateModel(samples);
// UnityEngine.Debug.Log("Update");
// Update target network
// if (m_TrainingStep % m_Config.updateTargetFreq == 0)

24
com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs


internal class MaxActionOutputApplier : TensorApplier.IApplier
{
readonly ActionSpec m_ActionSpec;
int saved_id = -1;
System.Random rand = new System.Random();
// MyTimeScaleSetting greedySetting;
// greedySetting = UnityEngine.GameObject.FindObjectsOfType<MyTimeScaleSetting>()[0];
if (saved_id == -1)
{
saved_id = actionIds[0];
}
var agentIndex = 0;
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];

}
var discreteBuffer = actionBuffer.DiscreteActions;
var maxIndex = 0;
var maxValue = 0;
var maxValue = float.MinValue;
var value = (int)tensorProxy.data[agentIndex, j];
var value = tensorProxy.data[agentIndex, j];
if (value > maxValue)
{
maxIndex = j;

// Greedy
// if (saved_id != agentId){
if (rand.NextDouble() < MyTimeScaleSetting.instance.GreedyEpislon)
{
discreteBuffer[0] = rand.Next((int)actionSpaceSize);
// UnityEngine.Debug.Log(discreteBuffer[0]);
}
// }
}
agentIndex++;
}

13
com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs


m_TrainingInputs = barracudaModel.GetTrainingInputTensors();
List<TensorProxy> infTensors = new List<TensorProxy>();
foreach(var tensor in m_TrainingInputs)
foreach (var tensor in m_TrainingInputs)
{
if (tensor.name == TensorNames.Observations || tensor.name == TensorNames.BatchSizePlaceholder)
{

m_InferenceInputs = (IReadOnlyList<TensorProxy>) infTensors;
m_InferenceInputs = (IReadOnlyList<TensorProxy>)infTensors;
m_TensorGenerator = new TensorGenerator(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TrainingTensorGenerator = new TrainingTensorGenerator(

// Update the model
FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut });
m_TrainingState = m_TrainingOutputs[0];
TensorUtils.CopyTensor(m_TrainingOutputs[0], m_TrainingState);
// UnityEngine.Debug.Log(m_TrainingState.data[0]);
// m_TrainingState = m_TrainingOutputs[0];
// for (int i = 0; i < m_TrainingOutputs[0].data.length; i++){
// UnityEngine.Debug.Log(m_TrainingOutputs[0].data[i]);
// }
// throw new System.Exception("STOP");
}
public ActionBuffers GetAction(int agentId)

81
com.unity.ml-agents/Runtime/Training/MyTimeScaleSetting.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class MyTimeScaleSetting : MonoBehaviour
{
// s_Instance is used to cache the instance found in the scene so we don't have to look it up every time.
private static MyTimeScaleSetting s_Instance = null;
// A static property that finds or creates an instance of the manager object and returns it.
public static MyTimeScaleSetting instance
{
get
{
if (s_Instance == null)
{
// FindObjectOfType() returns the first AManager object in the scene.
s_Instance = FindObjectOfType(typeof(MyTimeScaleSetting)) as MyTimeScaleSetting;
}
// If it is still null, create a new instance
if (s_Instance == null)
{
var obj = new GameObject("MyTimeScaleSetting");
s_Instance = obj.AddComponent<MyTimeScaleSetting>();
}
return s_Instance;
}
}
// Ensure that the instance is destroyed when the game is stopped in the editor.
void OnApplicationQuit()
{
s_Instance = null;
}
[SerializeField]
float m_TimeScale = 1f;
public float MyTimeScale
{
get { return m_TimeScale; }
set
{
m_TimeScale = value;
Time.timeScale = value;
}
}
[SerializeField]
float m_Greedy = 0f;
public float GreedyEpislon
{
get { return m_Greedy; }
set { m_Greedy = value; }
}
[SerializeField]
bool m_Train = true;
public bool IsTraining
{
get { return m_Train; }
set { m_Train = value; }
}
// Start is called before the first frame update
void Start()
{
DontDestroyOnLoad(this.gameObject);
}
// Update is called once per frame
void Update()
{
Time.timeScale = m_TimeScale;
}
}

11
com.unity.ml-agents/Runtime/Training/MyTimeScaleSetting.cs.meta


fileFormatVersion: 2
guid: b1f466227168940b7ac84e337e9fcc8a
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存