Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

208 行
5.8 KiB

using System;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Extensions.Match3;
namespace Unity.MLAgentsExamples
{
/// <summary>
/// State of the "game" when showing all steps of the simulation. This is only used outside of training.
/// The state diagram is
///
/// | <--------------------------------------- ^
/// | |
/// v |
/// +--------+ +-------+ +-----+ +------+
/// |Find | ---> |Clear | ---> |Drop | ---> |Fill |
/// |Matches | |Matched| | | |Empty |
/// +--------+ +-------+ +-----+ +------+
///
/// | ^
/// | |
/// v |
///
/// +--------+
/// |Wait for|
/// |Move |
/// +--------+
///
/// The stats advances each "MoveTime" seconds.
/// </summary>
enum State
{
/// <summary>
/// Guard value, should never happen.
/// </summary>
Invalid = -1,
/// <summary>
/// Look for matches. If there are matches, the next state is ClearMatched, otherwise WaitForMove.
/// </summary>
FindMatches = 0,
/// <summary>
/// Remove matched cells and replace them with a placeholder value.
/// </summary>
ClearMatched = 1,
/// <summary>
/// Move cells "down" to fill empty space.
/// </summary>
Drop = 2,
/// <summary>
/// Replace empty cells with new random values.
/// </summary>
FillEmpty = 3,
/// <summary>
/// Request a move from the Agent.
/// </summary>
WaitForMove = 4,
}
public class Match3Agent : Agent
{
[HideInInspector]
public Match3Board Board;
public float MoveTime = 1.0f;
public int MaxMoves = 500;
State m_CurrentState = State.WaitForMove;
float m_TimeUntilMove;
private int m_MovesMade;
private const float k_RewardMultiplier = 0.01f;
void Awake()
{
Board = GetComponent<Match3Board>();
}
public override void OnEpisodeBegin()
{
base.OnEpisodeBegin();
Board.UpdateCurrentBoardSize();
Board.InitSettled();
m_CurrentState = State.FindMatches;
m_TimeUntilMove = MoveTime;
m_MovesMade = 0;
}
private void FixedUpdate()
{
if (Academy.Instance.IsCommunicatorOn)
{
FastUpdate();
}
else
{
AnimatedUpdate();
}
// We can't use the normal MaxSteps system to decide when to end an episode,
// since different agents will make moves at different frequencies (depending on the number of
// chained moves). So track a number of moves per Agent and manually interrupt the episode.
if (m_MovesMade >= MaxMoves)
{
EpisodeInterrupted();
}
}
void FastUpdate()
{
while (true)
{
var hasMatched = Board.MarkMatchedCells();
if (!hasMatched)
{
break;
}
var pointsEarned = Board.ClearMatchedCells();
AddReward(k_RewardMultiplier * pointsEarned);
Board.DropCells();
Board.FillFromAbove();
}
while (!HasValidMoves())
{
// Shuffle the board until we have a valid move.
Board.InitSettled();
}
RequestDecision();
m_MovesMade++;
}
void AnimatedUpdate()
{
m_TimeUntilMove -= Time.deltaTime;
if (m_TimeUntilMove > 0.0f)
{
return;
}
m_TimeUntilMove = MoveTime;
State nextState;
switch (m_CurrentState)
{
case State.FindMatches:
var hasMatched = Board.MarkMatchedCells();
nextState = hasMatched ? State.ClearMatched : State.WaitForMove;
if (nextState == State.WaitForMove)
{
m_MovesMade++;
}
break;
case State.ClearMatched:
var pointsEarned = Board.ClearMatchedCells();
AddReward(k_RewardMultiplier * pointsEarned);
nextState = State.Drop;
break;
case State.Drop:
Board.DropCells();
nextState = State.FillEmpty;
break;
case State.FillEmpty:
Board.FillFromAbove();
nextState = State.FindMatches;
break;
case State.WaitForMove:
while (true)
{
// Shuffle the board until we have a valid move.
bool hasMoves = HasValidMoves();
if (hasMoves)
{
break;
}
Board.InitSettled();
}
RequestDecision();
nextState = State.FindMatches;
break;
default:
throw new ArgumentOutOfRangeException();
}
m_CurrentState = nextState;
}
bool HasValidMoves()
{
foreach (var unused in Board.ValidMoves())
{
return true;
}
return false;
}
}
}