Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

110 行
2.9 KiB

using UnityEngine;
using MLAgents;
/// <summary>
/// An example of how to use ML-Agents without inheriting from the Agent class.
/// Observations are generated by the attached SensorComponent, and the actions
/// are retrieved from the Agent.
/// </summary>
public class BasicController : MonoBehaviour
{
public float timeBetweenDecisionsAtInference;
float m_TimeSinceDecision;
[HideInInspector]
public int m_Position;
const int k_SmallGoalPosition = 7;
const int k_LargeGoalPosition = 17;
public GameObject largeGoal;
public GameObject smallGoal;
const int k_MinPosition = 0;
const int k_MaxPosition = 20;
public const int k_Extents = k_MaxPosition - k_MinPosition;
Agent m_Agent;
public void OnEnable()
{
m_Agent = GetComponent<Agent>();
ResetAgent();
}
/// <summary>
/// Controls the movement of the GameObject based on the actions received.
/// </summary>
/// <param name="vectorAction"></param>
public void ApplyAction(float[] vectorAction)
{
var movement = (int)vectorAction[0];
var direction = 0;
switch (movement)
{
case 1:
direction = -1;
break;
case 2:
direction = 1;
break;
}
m_Position += direction;
if (m_Position < k_MinPosition) { m_Position = k_MinPosition; }
if (m_Position > k_MaxPosition) { m_Position = k_MaxPosition; }
gameObject.transform.position = new Vector3(m_Position - 10f, 0f, 0f);
m_Agent.AddReward(-0.01f);
if (m_Position == k_SmallGoalPosition)
{
m_Agent.AddReward(0.1f);
m_Agent.EndEpisode();
ResetAgent();
}
if (m_Position == k_LargeGoalPosition)
{
m_Agent.AddReward(1f);
m_Agent.EndEpisode();
ResetAgent();
}
}
public void ResetAgent()
{
m_Position = 10;
smallGoal.transform.position = new Vector3(k_SmallGoalPosition - 10f, 0f, 0f);
largeGoal.transform.position = new Vector3(k_LargeGoalPosition - 10f, 0f, 0f);
}
public void FixedUpdate()
{
WaitTimeInference();
}
void WaitTimeInference()
{
if (Academy.Instance.IsCommunicatorOn)
{
// Apply the previous step's actions
ApplyAction(m_Agent.GetAction());
m_Agent.RequestDecision();
}
else
{
if (m_TimeSinceDecision >= timeBetweenDecisionsAtInference)
{
// Apply the previous step's actions
ApplyAction(m_Agent.GetAction());
m_TimeSinceDecision = 0f;
m_Agent.RequestDecision();
}
else
{
m_TimeSinceDecision += Time.fixedDeltaTime;
}
}
}
}