using System.Collections.Generic; using System; using System.Collections; using MLAgents.Sensors; namespace MLAgents.Policies { /// /// The Heuristic Policy uses a hards coded Heuristic method /// to take decisions each time the RequestDecision method is /// called. /// internal class HeuristicPolicy : IPolicy { Func m_Heuristic; float[] m_LastDecision; WriteAdapter m_WriteAdapter = new WriteAdapter(); NullList m_NullList = new NullList(); /// public HeuristicPolicy(Func heuristic) { m_Heuristic = heuristic; } /// public void RequestDecision(AgentInfo info, List sensors) { StepSensors(sensors); m_LastDecision = m_Heuristic.Invoke(); } /// public float[] DecideAction() { return m_LastDecision; } public void Dispose() { } /// /// Trivial implementation of the IList interface that does nothing. /// This is only used for "writing" observations that we will discard. /// class NullList : IList { public IEnumerator GetEnumerator() { throw new NotImplementedException(); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } public void Add(float item) { } public void Clear() { } public bool Contains(float item) { return false; } public void CopyTo(float[] array, int arrayIndex) { throw new NotImplementedException(); } public bool Remove(float item) { return false; } public int Count { get; } public bool IsReadOnly { get; } public int IndexOf(float item) { return -1; } public void Insert(int index, float item) { } public void RemoveAt(int index) { } public float this[int index] { get { return 0.0f; } set { } } } /// /// Run ISensor.Write or ISensor.GetCompressedObservation for each sensor /// The output is currently unused, but this makes the sensor usage consistent /// between training and inference. /// /// void StepSensors(List sensors) { foreach (var sensor in sensors) { if (sensor.GetCompressionType() == SensorCompressionType.None) { m_WriteAdapter.SetTarget(m_NullList, sensor.GetObservationShape(), 0); sensor.Write(m_WriteAdapter); } else { sensor.GetCompressedObservation(); } } } } }