using System.Collections.Generic; using Unity.MLAgents.Actuators; using Unity.MLAgents.Sensors; using Unity.MLAgents.Analytics; namespace Unity.MLAgents.Policies { /// /// The Remote Policy only works when training. /// When training your Agents, the RemotePolicy will be controlled by Python. /// internal class RemotePolicy : IPolicy { int m_AgentId; string m_FullyQualifiedBehaviorName; ActionSpec m_ActionSpec; ActionBuffers m_LastActionBuffer; private bool m_AnalyticsSent = false; internal ICommunicator m_Communicator; /// /// List of actuators, only used for analytics /// private IList m_Actuators; /// public RemotePolicy( ActionSpec actionSpec, IList actuators, string fullyQualifiedBehaviorName) { m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName; m_Communicator = Academy.Instance.Communicator; m_Communicator?.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec); m_ActionSpec = actionSpec; m_Actuators = actuators; } /// public void RequestDecision(AgentInfo info, List sensors) { if (!m_AnalyticsSent) { m_AnalyticsSent = true; TrainingAnalytics.RemotePolicyInitialized( m_FullyQualifiedBehaviorName, sensors, m_ActionSpec, m_Actuators ); } m_AgentId = info.episodeId; m_Communicator?.PutObservations(m_FullyQualifiedBehaviorName, info, sensors); } /// public ref readonly ActionBuffers DecideAction() { m_Communicator?.DecideBatch(); var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId); m_LastActionBuffer = actions == null ? ActionBuffers.Empty : (ActionBuffers)actions; return ref m_LastActionBuffer; } public void Dispose() { } } }