//Put this script on your blue cube. using System.Collections; using System.Collections.Generic; using UnityEngine; public class PushAgentBasic : Agent { /// /// The ground. The bounds are used to spawn the elements. /// public GameObject ground; public GameObject area; /// /// The area bounds. /// [HideInInspector] public Bounds areaBounds; PushBlockAcademy academy; /// /// The goal to push the block to. /// public GameObject goal; /// /// The block to be pushed to the goal. /// public GameObject block; /// /// Detects when the block touches the goal. /// [HideInInspector] public GoalDetect goalDetect; Rigidbody blockRB; //cached on initialization Rigidbody agentRB; //cached on initialization Material groundMaterial; //cached on Awake() /// /// We will be changing the ground material based on success/failue /// Renderer groundRenderer; void Awake() { // There is one brain in the scene so this should find our brain. brain = FindObjectOfType(); academy = FindObjectOfType(); //cache the academy } public override void InitializeAgent() { base.InitializeAgent(); goalDetect = block.GetComponent(); goalDetect.agent = this; // Cache the agent rigidbody agentRB = GetComponent(); // Cache the block rigidbody blockRB = block.GetComponent(); // Get the ground's bounds areaBounds = ground.GetComponent().bounds; // Get the ground renderer so we can change the material when a goal is scored groundRenderer = ground.GetComponent(); // Starting material groundMaterial = groundRenderer.material; } public override void CollectObservations() { // Block position relative to goal. Vector3 blockPosRelToGoal = blockRB.position - goal.transform.position; // Block position relative to agent. Vector3 blockPosRelToAgent = blockRB.position - agentRB.position; // Obstacle position relative to agent. // Agent position relative to ground. Vector3 agentPos = agentRB.position - area.transform.position; // Goal position relative to ground. Vector3 goalPos = goal.transform.position - ground.transform.position; AddVectorObs(agentPos); AddVectorObs(goalPos); AddVectorObs(blockPosRelToGoal); AddVectorObs(blockPosRelToAgent); // Add velocity of block and agent to observations. AddVectorObs(blockRB.velocity); AddVectorObs(agentRB.velocity); } /// /// Use the ground's bounds to pick a random spawn position. /// public Vector3 GetRandomSpawnPos(float spawnHeight) { bool foundNewSpawnLocation = false; Vector3 randomSpawnPos = Vector3.zero; while (foundNewSpawnLocation == false) { float randomPosX = Random.Range(-areaBounds.extents.x * academy.spawnAreaMarginMultiplier, areaBounds.extents.x * academy.spawnAreaMarginMultiplier); float randomPosZ = Random.Range(-areaBounds.extents.z * academy.spawnAreaMarginMultiplier, areaBounds.extents.z * academy.spawnAreaMarginMultiplier); randomSpawnPos = ground.transform.position + new Vector3(randomPosX, 1f, randomPosZ); if (Physics.CheckBox(randomSpawnPos, new Vector3(2.5f, 0.01f, 2.5f)) == false) { foundNewSpawnLocation = true; } } return randomSpawnPos; } /// /// Called when the agent moves the block into the goal. /// public void IScoredAGoal() { // We use a reward of 5. AddReward(5f); // By marking an agent as done AgentReset() will be called automatically. Done(); // Swap ground material for a bit to indicate we scored. StartCoroutine(GoalScoredSwapGroundMaterial(academy.goalScoredMaterial, 1)); } /// /// Swap ground material, wait time seconds, then swap back to the regular material. /// IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time) { groundRenderer.material = mat; yield return new WaitForSeconds(time); // Wait for 2 sec groundRenderer.material = groundMaterial; } /// /// Moves the agent according to the selected action. /// public void MoveAgent(float[] act) { // AGENT ACTIONS // Here we define the actions our agent can use, such as // "go left", "go forward", "turn", etc. // In the brain we define the number of axes we want to use here. // In this example we need 2 axes to define: // Right/left movement (act[0]) // Forward/back movement (act[1]) // Example: Right/Left Movement. It is defined in this line: // Vector3 directionX = Vector3.right * Mathf.Clamp(act[0], -1f, 1f); // The neural network is setting the act[0] value. // If it chooses 1 then the agent will go right. // If it chooses -1 the agent will go left. // If it chooses .42 then it will go a little bit right // If it chooses -.8 then it will go left (well...80% left) // Energy Conservation Penalties // Give penalties based on how fast the agent chooses to go. // The agent should only exert as much energy as necessary. // This is how animals work as well. // i.e. You're probably not running in place at all times. // Larger the value, the less the penalty is. float energyConservPenaltyModifier = 10000; // The larger the movement, the greater the penalty given. AddReward(-Mathf.Abs(act[0]) / energyConservPenaltyModifier); AddReward(-Mathf.Abs(act[1]) / energyConservPenaltyModifier); Vector3 directionX = Vector3.zero; Vector3 directionZ = Vector3.zero; // Move left or right in world space. directionX = Vector3.right * Mathf.Clamp(act[0], -1f, 1f); // Move forward or back in world space. directionZ = Vector3.forward * Mathf.Clamp(act[1], -1f, 1f); // Add directions together. This is the direction we want the agent // to move in. Vector3 dirToGo = directionX + directionZ; // Apply movement force! agentRB.AddForce(dirToGo * academy.agentRunSpeed, ForceMode.VelocityChange); if (dirToGo != Vector3.zero) { // Rotate the agent appropriately. agentRB.rotation = Quaternion.Lerp(agentRB.rotation, Quaternion.LookRotation(dirToGo), Time.deltaTime * academy.agentRotationSpeed); } } /// /// Called every step of the engine. Here the agent takes an action. /// public override void AgentAction(float[] vectorAction, string textAction) { // Move the agent using the action. MoveAgent(vectorAction); // Penalty given each step to encourage agent to finish task quickly. AddReward(-.00005f); // Did the agent or block get pushed off the edge? bool fail = false; // If the agent has gone over the edge, end the episode. if (!Physics.Raycast(agentRB.position, Vector3.down, 3)) { // Fell off bro fail = true; // BAD AGENT SetReward(-1f); // If we mark an agent as done it will be reset automatically. // AgentReset() will be called. Done(); } // If the block has gone over the edge, end the episode. if (!Physics.Raycast(blockRB.position, Vector3.down, 3)) { // Fell off bro fail = true; // BAD AGENT SetReward(-1f); // If we mark an agent as done it will be reset automatically. // AgentReset() will be called. Done(); } if (fail) { // Swap ground material to indicate failure of the episode. StartCoroutine(GoalScoredSwapGroundMaterial(academy.failMaterial, 1f)); } } /// /// Resets the block position and velocities. /// void ResetBlock() { // Get a random position for the block. block.transform.position = GetRandomSpawnPos(1.5f); // Reset block velocity back to zero. blockRB.velocity = Vector3.zero; // Reset block angularVelocity back to zero. blockRB.angularVelocity = Vector3.zero; } /// /// In the editor, if "Reset On Done" is checked then AgentReset() will be /// called automatically anytime we mark done = true in an agent script. /// public override void AgentReset() { int rotation = Random.Range(0, 4); float rotationAngle = rotation * 90f; area.transform.Rotate(new Vector3(0f, rotationAngle, 0f)); ResetBlock(); transform.position = GetRandomSpawnPos(1.5f); agentRB.velocity = Vector3.zero; agentRB.angularVelocity = Vector3.zero; } }