using System; using UnityEngine; using Unity.MLAgents; using Unity.MLAgents.Actuators; using Unity.MLAgents.Extensions.Match3; namespace Unity.MLAgentsExamples { /// /// State of the "game" when showing all steps of the simulation. This is only used outside of training. /// The state diagram is /// /// | <--------------------------------------- ^ /// | | /// v | /// +--------+ +-------+ +-----+ +------+ /// |Find | ---> |Clear | ---> |Drop | ---> |Fill | /// |Matches | |Matched| | | |Empty | /// +--------+ +-------+ +-----+ +------+ /// /// | ^ /// | | /// v | /// /// +--------+ /// |Wait for| /// |Move | /// +--------+ /// /// The stats advances each "MoveTime" seconds. /// enum State { /// /// Guard value, should never happen. /// Invalid = -1, /// /// Look for matches. If there are matches, the next state is ClearMatched, otherwise WaitForMove. /// FindMatches = 0, /// /// Remove matched cells and replace them with a placeholder value. /// ClearMatched = 1, /// /// Move cells "down" to fill empty space. /// Drop = 2, /// /// Replace empty cells with new random values. /// FillEmpty = 3, /// /// Request a move from the Agent. /// WaitForMove = 4, } public class Match3Agent : Agent { [HideInInspector] public Match3Board Board; public float MoveTime = 1.0f; public int MaxMoves = 500; State m_CurrentState = State.WaitForMove; float m_TimeUntilMove; private int m_MovesMade; private const float k_RewardMultiplier = 0.01f; void Awake() { Board = GetComponent(); } public override void OnEpisodeBegin() { base.OnEpisodeBegin(); Board.InitSettled(); m_CurrentState = State.FindMatches; m_TimeUntilMove = MoveTime; m_MovesMade = 0; } private void FixedUpdate() { if (Academy.Instance.IsCommunicatorOn) { FastUpdate(); } else { AnimatedUpdate(); } // We can't use the normal MaxSteps system to decide when to end an episode, // since different agents will make moves at different frequencies (depending on the number of // chained moves). So track a number of moves per Agent and manually interrupt the episode. if (m_MovesMade >= MaxMoves) { EpisodeInterrupted(); } } void FastUpdate() { while (true) { var hasMatched = Board.MarkMatchedCells(); if (!hasMatched) { break; } var pointsEarned = Board.ClearMatchedCells(); AddReward(k_RewardMultiplier * pointsEarned); Board.DropCells(); Board.FillFromAbove(); } while (!HasValidMoves()) { // Shuffle the board until we have a valid move. Board.InitSettled(); } RequestDecision(); m_MovesMade++; } void AnimatedUpdate() { m_TimeUntilMove -= Time.deltaTime; if (m_TimeUntilMove > 0.0f) { return; } m_TimeUntilMove = MoveTime; State nextState; switch (m_CurrentState) { case State.FindMatches: var hasMatched = Board.MarkMatchedCells(); nextState = hasMatched ? State.ClearMatched : State.WaitForMove; if (nextState == State.WaitForMove) { m_MovesMade++; } break; case State.ClearMatched: var pointsEarned = Board.ClearMatchedCells(); AddReward(k_RewardMultiplier * pointsEarned); nextState = State.Drop; break; case State.Drop: Board.DropCells(); nextState = State.FillEmpty; break; case State.FillEmpty: Board.FillFromAbove(); nextState = State.FindMatches; break; case State.WaitForMove: while (true) { // Shuffle the board until we have a valid move. bool hasMoves = HasValidMoves(); if (hasMoves) { break; } Board.InitSettled(); } RequestDecision(); nextState = State.FindMatches; break; default: throw new ArgumentOutOfRangeException(); } m_CurrentState = nextState; } bool HasValidMoves() { foreach (var unused in Board.ValidMoves()) { return true; } return false; } } }