Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

145 行
5.4 KiB

using System;
using System.Linq;
using System.Collections.Generic;
namespace Unity.MLAgents
{
/// <summary>
/// A basic class implementation of MultiAgentGroup.
/// </summary>
public class SimpleMultiAgentGroup : IMultiAgentGroup, IDisposable
{
readonly int m_Id = MultiAgentGroupIdCounter.GetGroupId();
HashSet<Agent> m_Agents = new HashSet<Agent>();
/// <summary>
/// Disposes of the SimpleMultiAgentGroup.
/// </summary>
public virtual void Dispose()
{
while (m_Agents.Count > 0)
{
UnregisterAgent(m_Agents.First());
}
}
/// <inheritdoc />
public virtual void RegisterAgent(Agent agent)
{
if (!m_Agents.Contains(agent))
{
agent.SetMultiAgentGroup(this);
m_Agents.Add(agent);
agent.OnAgentDisabled += UnregisterAgent;
}
}
/// <inheritdoc />
public virtual void UnregisterAgent(Agent agent)
{
if (m_Agents.Contains(agent))
{
agent.SetMultiAgentGroup(null);
m_Agents.Remove(agent);
agent.OnAgentDisabled -= UnregisterAgent;
}
}
/// <inheritdoc />
public int GetId()
{
return m_Id;
}
/// <summary>
/// Get list of all agents currently registered to this MultiAgentGroup.
/// </summary>
/// <returns>
/// List of agents registered to the MultiAgentGroup.
/// </returns>
public IReadOnlyCollection<Agent> GetRegisteredAgents()
{
return (IReadOnlyCollection<Agent>)m_Agents;
}
/// <summary>
/// Increments the group rewards for all agents in this MultiAgentGroup.
/// </summary>
/// <remarks>
/// This function increases or decreases the group rewards by a given amount for all agents
/// in the group. Use <see cref="SetGroupReward(float)"/> to set the group reward assigned
/// to the current step with a specific value rather than increasing or decreasing it.
///
/// A positive group reward indicates the whole group's accomplishments or desired behaviors.
/// Every agent in the group will receive the same group reward no matter whether the
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
/// to act in the group's best interest instead of individual ones.
/// Group rewards are treated differently than individual agent rewards during training, so
/// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group.
/// </remarks>
/// <param name="reward">Incremental group reward value.</param>
public void AddGroupReward(float reward)
{
foreach (var agent in m_Agents)
{
agent.AddGroupReward(reward);
}
}
/// <summary>
/// Set the group rewards for all agents in this MultiAgentGroup.
/// </summary>
/// <remarks>
/// This function replaces any group rewards given during the current step for all agents in the group.
/// Use <see cref="AddGroupReward(float)"/> to incrementally change the group reward rather than
/// overriding it.
///
/// A positive group reward indicates the whole group's accomplishments or desired behaviors.
/// Every agent in the group will receive the same group reward no matter whether the
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
/// to act in the group's best interest instead of indivisual ones.
/// Group rewards are treated differently than individual agent rewards during training, so
/// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group.
/// </remarks>
/// <param name="reward">The new value of the group reward.</param>
public void SetGroupReward(float reward)
{
foreach (var agent in m_Agents)
{
agent.SetGroupReward(reward);
}
}
/// <summary>
/// End episodes for all agents in this MultiAgentGroup.
/// </summary>
/// <remarks>
/// This should be used when the episode can no longer continue, such as when the group
/// reaches the goal or fails at the task.
/// </remarks>
public void EndGroupEpisode()
{
foreach (var agent in m_Agents)
{
agent.EndEpisode();
}
}
/// <summary>
/// Indicate that the episode is over but not due to the "fault" of the group.
/// This has the same end result as calling <see cref="EndGroupEpisode"/>, but has a
/// slightly different effect on training.
/// </summary>
/// <remarks>
/// This should be used when the episode could continue, but has gone on for
/// a sufficient number of steps, such as if the environment hits some maximum number of steps.
/// </remarks>
public void GroupEpisodeInterrupted()
{
foreach (var agent in m_Agents)
{
agent.EpisodeInterrupted();
}
}
}
}