using System; using System.Linq; using System.Collections.Generic; namespace Unity.MLAgents { /// /// A basic class implementation of MultiAgentGroup. /// public class SimpleMultiAgentGroup : IMultiAgentGroup, IDisposable { readonly int m_Id = MultiAgentGroupIdCounter.GetGroupId(); HashSet m_Agents = new HashSet(); public virtual void Dispose() { while (m_Agents.Count > 0) { UnregisterAgent(m_Agents.First()); } } /// public virtual void RegisterAgent(Agent agent) { if (!m_Agents.Contains(agent)) { agent.SetMultiAgentGroup(this); m_Agents.Add(agent); agent.OnAgentDisabled += UnregisterAgent; } } /// public virtual void UnregisterAgent(Agent agent) { if (m_Agents.Contains(agent)) { agent.SetMultiAgentGroup(null); m_Agents.Remove(agent); agent.OnAgentDisabled -= UnregisterAgent; } } /// public int GetId() { return m_Id; } /// /// Get list of all agents currently registered to this MultiAgentGroup. /// /// /// List of agents registered to the MultiAgentGroup. /// public IReadOnlyCollection GetRegisteredAgents() { return (IReadOnlyCollection)m_Agents; } /// /// Increments the group rewards for all agents in this MultiAgentGroup. /// /// /// This function increases or decreases the group rewards by a given amount for all agents /// in the group. Use to set the group reward assigned /// to the current step with a specific value rather than increasing or decreasing it. /// /// A positive group reward indicates the whole group's accomplishments or desired behaviors. /// Every agent in the group will receive the same group reward no matter whether the /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents /// to act in the group's best interest instead of individual ones. /// Group rewards are treated differently than individual agent rewards during training, so /// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group. /// /// Incremental group reward value. public void AddGroupReward(float reward) { foreach (var agent in m_Agents) { agent.AddGroupReward(reward); } } /// /// Set the group rewards for all agents in this MultiAgentGroup. /// /// /// This function replaces any group rewards given during the current step for all agents in the group. /// Use to incrementally change the group reward rather than /// overriding it. /// /// A positive group reward indicates the whole group's accomplishments or desired behaviors. /// Every agent in the group will receive the same group reward no matter whether the /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents /// to act in the group's best interest instead of indivisual ones. /// Group rewards are treated differently than individual agent rewards during training, so /// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group. /// /// The new value of the group reward. public void SetGroupReward(float reward) { foreach (var agent in m_Agents) { agent.SetGroupReward(reward); } } /// /// End episodes for all agents in this MultiAgentGroup. /// /// /// This should be used when the episode can no longer continue, such as when the group /// reaches the goal or fails at the task. /// public void EndGroupEpisode() { foreach (var agent in m_Agents) { agent.EndEpisode(); } } /// /// Indicate that the episode is over but not due to the "fault" of the group. /// This has the same end result as calling , but has a /// slightly different effect on training. /// /// /// This should be used when the episode could continue, but has gone on for /// a sufficient number of steps, such as if the environment hits some maximum number of steps. /// public void GroupEpisodeInterrupted() { foreach (var agent in m_Agents) { agent.EpisodeInterrupted(); } } } }