浏览代码

Step sensor for Heuristic policy (#3542)

/bug-failed-api-check
GitHub 5 年前
当前提交
a27117a4
共有 3 个文件被更改,包括 135 次插入4 次删除
  1. 2
      com.unity.ml-agents/Runtime/Agent.cs
  2. 89
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  3. 48
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs

2
com.unity.ml-agents/Runtime/Agent.cs


/// </returns>
public virtual float[] Heuristic()
{
Debug.LogWarning("Heuristic method called but not implemented. Return placeholder actions.");
Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
var param = m_PolicyFactory.brainParameters;
var actionSize = param.vectorActionSpaceType == SpaceType.Continuous ?
param.vectorActionSize[0] :

89
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


using System.Collections.Generic;
using System;
using System.Collections;
using MLAgents.Sensors;
namespace MLAgents.Policies

Func<float[]> m_Heuristic;
float[] m_LastDecision;
WriteAdapter m_WriteAdapter = new WriteAdapter();
NullList m_NullList = new NullList();
/// <inheritdoc />
public HeuristicPolicy(Func<float[]> heuristic)
{

/// <inheritdoc />
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
StepSensors(sensors);
m_LastDecision = m_Heuristic.Invoke();
}

public void Dispose()
{
}
/// <summary>
/// Trivial implementation of the IList interface that does nothing.
/// This is only used for "writing" observations that we will discard.
/// </summary>
class NullList : IList<float>
{
public IEnumerator<float> GetEnumerator()
{
throw new NotImplementedException();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Add(float item)
{
}
public void Clear()
{
}
public bool Contains(float item)
{
return false;
}
public void CopyTo(float[] array, int arrayIndex)
{
throw new NotImplementedException();
}
public bool Remove(float item)
{
return false;
}
public int Count { get; }
public bool IsReadOnly { get; }
public int IndexOf(float item)
{
return -1;
}
public void Insert(int index, float item)
{
}
public void RemoveAt(int index)
{
}
public float this[int index]
{
get { return 0.0f; }
set { }
}
}
/// <summary>
/// Run ISensor.Write or ISensor.GetCompressedObservation for each sensor
/// The output is currently unused, but this makes the sensor usage consistent
/// between training and inference.
/// </summary>
/// <param name="sensors"></param>
void StepSensors(List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
if (sensor.GetCompressionType() == SensorCompressionType.None)
{
m_WriteAdapter.SetTarget(m_NullList, sensor.GetObservationShape(), 0);
sensor.Write(m_WriteAdapter);
}
else
{
sensor.GetCompressedObservation();
}
}
}
}
}

48
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, policy);
}
internal IPolicy GetPolicy()
{
return (IPolicy) typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
public int initializeAgentCalls;
public int collectObservationsCalls;
public int collectObservationsCallsSinceLastReset;

public int heuristicCalls;
public TestSensor sensor1;
public TestSensor sensor2;
var sensor1 = new TestSensor("testsensor1");
var sensor2 = new TestSensor("testsensor2");
sensor1 = new TestSensor("testsensor1");
sensor2 = new TestSensor("testsensor2");
sensor2.compressionType = SensorCompressionType.PNG;
sensors.Add(sensor2);
sensors.Add(sensor1);

public override float[] Heuristic()
{
heuristicCalls++;
return new float[0];
}
}

public string sensorName;
public int numWriteCalls;
public int numCompressedCalls;
public SensorCompressionType compressionType = SensorCompressionType.None;
public TestSensor(string n)
{

public int Write(WriteAdapter adapter)
{
numWriteCalls++;
// No-op
return 0;
}

numCompressedCalls++;
return SensorCompressionType.None;
return compressionType;
}
public string GetName()

Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls);
Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset);
}
}
[Test]
public void TestHeuristicPolicyStepsSensors()
{
// Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 1;
decisionRequester.Awake();
agent1.LazyInitialize();
Assert.AreEqual(agent1.GetPolicy().GetType(), typeof(HeuristicPolicy));
var numSteps = 10;
for (var i = 0; i < numSteps; i++)
{
aca.EnvironmentStep();
}
Assert.AreEqual(numSteps, agent1.heuristicCalls);
Assert.AreEqual(numSteps, agent1.sensor1.numWriteCalls);
Assert.AreEqual(numSteps, agent1.sensor2.numCompressedCalls);
}
}
}
正在加载...
取消
保存