using System;
using System.Linq;
using System.Collections.Generic;
namespace Unity.MLAgents
{
///
/// A basic class implementation of MultiAgentGroup.
///
internal class SimpleMultiAgentGroup : IMultiAgentGroup, IDisposable
{
readonly int m_Id = MultiAgentGroupIdCounter.GetGroupId();
HashSet m_Agents = new HashSet();
public virtual void Dispose()
{
while (m_Agents.Count > 0)
{
UnregisterAgent(m_Agents.First());
}
}
///
public virtual void RegisterAgent(Agent agent)
{
if (!m_Agents.Contains(agent))
{
agent.SetMultiAgentGroup(this);
m_Agents.Add(agent);
agent.OnAgentDisabled += UnregisterAgent;
}
}
///
public virtual void UnregisterAgent(Agent agent)
{
if (m_Agents.Contains(agent))
{
agent.SetMultiAgentGroup(null);
m_Agents.Remove(agent);
agent.OnAgentDisabled -= UnregisterAgent;
}
}
///
public int GetId()
{
return m_Id;
}
///
/// Get list of all agents currently registered to this MultiAgentGroup.
///
///
/// List of agents registered to the MultiAgentGroup.
///
public IReadOnlyCollection GetRegisteredAgents()
{
return (IReadOnlyCollection)m_Agents;
}
///
/// Increments the group rewards for all agents in this MultiAgentGroup.
///
///
/// This function increases or decreases the group rewards by a given amount for all agents
/// in the group. Use to set the group reward assigned
/// to the current step with a specific value rather than increasing or decreasing it.
///
/// A positive group reward indicates the whole group's accomplishments or desired behaviors.
/// Every agent in the group will receive the same group reward no matter whether the
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
/// to act in the group's best interest instead of individual ones.
/// Group rewards are treated differently than individual agent rewards during training, so
/// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group.
///
/// Incremental group reward value.
public void AddGroupReward(float reward)
{
foreach (var agent in m_Agents)
{
agent.AddGroupReward(reward);
}
}
///
/// Set the group rewards for all agents in this MultiAgentGroup.
///
///
/// This function replaces any group rewards given during the current step for all agents in the group.
/// Use to incrementally change the group reward rather than
/// overriding it.
///
/// A positive group reward indicates the whole group's accomplishments or desired behaviors.
/// Every agent in the group will receive the same group reward no matter whether the
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
/// to act in the group's best interest instead of indivisual ones.
/// Group rewards are treated differently than individual agent rewards during training, so
/// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group.
///
/// The new value of the group reward.
public void SetGroupReward(float reward)
{
foreach (var agent in m_Agents)
{
agent.SetGroupReward(reward);
}
}
///
/// End episodes for all agents in this MultiAgentGroup.
///
///
/// This should be used when the episode can no longer continue, such as when the group
/// reaches the goal or fails at the task.
///
public void EndGroupEpisode()
{
foreach (var agent in m_Agents)
{
agent.EndEpisode();
}
}
///
/// Indicate that the episode is over but not due to the "fault" of the group.
/// This has the same end result as calling , but has a
/// slightly different effect on training.
///
///
/// This should be used when the episode could continue, but has gone on for
/// a sufficient number of steps, such as if the environment hits some maximum number of steps.
///
public void GroupEpisodeInterrupted()
{
foreach (var agent in m_Agents)
{
agent.EpisodeInterrupted();
}
}
}
}