Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

288 行
9.4 KiB

//Put this script on your blue cube.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class PushAgentBasic : Agent
{
/// <summary>
/// The ground. The bounds are used to spawn the elements.
/// </summary>
public GameObject ground;
public GameObject area;
/// <summary>
/// The area bounds.
/// </summary>
[HideInInspector]
public Bounds areaBounds;
PushBlockAcademy academy;
/// <summary>
/// The goal to push the block to.
/// </summary>
public GameObject goal;
/// <summary>
/// The block to be pushed to the goal.
/// </summary>
public GameObject block;
/// <summary>
/// Detects when the block touches the goal.
/// </summary>
[HideInInspector]
public GoalDetect goalDetect;
Rigidbody blockRB; //cached on initialization
Rigidbody agentRB; //cached on initialization
Material groundMaterial; //cached on Awake()
/// <summary>
/// We will be changing the ground material based on success/failue
/// </summary>
Renderer groundRenderer;
void Awake()
{
// There is one brain in the scene so this should find our brain.
brain = FindObjectOfType<Brain>();
academy = FindObjectOfType<PushBlockAcademy>(); //cache the academy
}
public override void InitializeAgent()
{
base.InitializeAgent();
goalDetect = block.GetComponent<GoalDetect>();
goalDetect.agent = this;
// Cache the agent rigidbody
agentRB = GetComponent<Rigidbody>();
// Cache the block rigidbody
blockRB = block.GetComponent<Rigidbody>();
// Get the ground's bounds
areaBounds = ground.GetComponent<Collider>().bounds;
// Get the ground renderer so we can change the material when a goal is scored
groundRenderer = ground.GetComponent<Renderer>();
// Starting material
groundMaterial = groundRenderer.material;
}
public override void CollectObservations()
{
// Block position relative to goal.
Vector3 blockPosRelToGoal = blockRB.position - goal.transform.position;
// Block position relative to agent.
Vector3 blockPosRelToAgent = blockRB.position - agentRB.position;
// Obstacle position relative to agent.
// Agent position relative to ground.
Vector3 agentPos = agentRB.position - area.transform.position;
// Goal position relative to ground.
Vector3 goalPos = goal.transform.position - ground.transform.position;
AddVectorObs(agentPos);
AddVectorObs(goalPos);
AddVectorObs(blockPosRelToGoal);
AddVectorObs(blockPosRelToAgent);
// Add velocity of block and agent to observations.
AddVectorObs(blockRB.velocity);
AddVectorObs(agentRB.velocity);
}
/// <summary>
/// Use the ground's bounds to pick a random spawn position.
/// </summary>
public Vector3 GetRandomSpawnPos(float spawnHeight)
{
bool foundNewSpawnLocation = false;
Vector3 randomSpawnPos = Vector3.zero;
while (foundNewSpawnLocation == false)
{
float randomPosX = Random.Range(-areaBounds.extents.x * academy.spawnAreaMarginMultiplier,
areaBounds.extents.x * academy.spawnAreaMarginMultiplier);
float randomPosZ = Random.Range(-areaBounds.extents.z * academy.spawnAreaMarginMultiplier,
areaBounds.extents.z * academy.spawnAreaMarginMultiplier);
randomSpawnPos = ground.transform.position + new Vector3(randomPosX, 1f, randomPosZ);
if (Physics.CheckBox(randomSpawnPos, new Vector3(2.5f, 0.01f, 2.5f)) == false)
{
foundNewSpawnLocation = true;
}
}
return randomSpawnPos;
}
/// <summary>
/// Called when the agent moves the block into the goal.
/// </summary>
public void IScoredAGoal()
{
// We use a reward of 5.
AddReward(5f);
// By marking an agent as done AgentReset() will be called automatically.
Done();
// Swap ground material for a bit to indicate we scored.
StartCoroutine(GoalScoredSwapGroundMaterial(academy.goalScoredMaterial, 1));
}
/// <summary>
/// Swap ground material, wait time seconds, then swap back to the regular material.
/// </summary>
IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{
groundRenderer.material = mat;
yield return new WaitForSeconds(time); // Wait for 2 sec
groundRenderer.material = groundMaterial;
}
/// <summary>
/// Moves the agent according to the selected action.
/// </summary>
public void MoveAgent(float[] act)
{
// AGENT ACTIONS
// Here we define the actions our agent can use, such as
// "go left", "go forward", "turn", etc.
// In the brain we define the number of axes we want to use here.
// In this example we need 2 axes to define:
// Right/left movement (act[0])
// Forward/back movement (act[1])
// Example: Right/Left Movement. It is defined in this line:
// Vector3 directionX = Vector3.right * Mathf.Clamp(act[0], -1f, 1f);
// The neural network is setting the act[0] value.
// If it chooses 1 then the agent will go right.
// If it chooses -1 the agent will go left.
// If it chooses .42 then it will go a little bit right
// If it chooses -.8 then it will go left (well...80% left)
// Energy Conservation Penalties
// Give penalties based on how fast the agent chooses to go.
// The agent should only exert as much energy as necessary.
// This is how animals work as well.
// i.e. You're probably not running in place at all times.
// Larger the value, the less the penalty is.
float energyConservPenaltyModifier = 10000;
// The larger the movement, the greater the penalty given.
AddReward(-Mathf.Abs(act[0]) / energyConservPenaltyModifier);
AddReward(-Mathf.Abs(act[1]) / energyConservPenaltyModifier);
Vector3 directionX = Vector3.zero;
Vector3 directionZ = Vector3.zero;
// Move left or right in world space.
directionX = Vector3.right * Mathf.Clamp(act[0], -1f, 1f);
// Move forward or back in world space.
directionZ = Vector3.forward * Mathf.Clamp(act[1], -1f, 1f);
// Add directions together. This is the direction we want the agent
// to move in.
Vector3 dirToGo = directionX + directionZ;
// Apply movement force!
agentRB.AddForce(dirToGo * academy.agentRunSpeed, ForceMode.VelocityChange);
if (dirToGo != Vector3.zero)
{
// Rotate the agent appropriately.
agentRB.rotation = Quaternion.Lerp(agentRB.rotation,
Quaternion.LookRotation(dirToGo),
Time.deltaTime * academy.agentRotationSpeed);
}
}
/// <summary>
/// Called every step of the engine. Here the agent takes an action.
/// </summary>
public override void AgentAction(float[] vectorAction, string textAction)
{
// Move the agent using the action.
MoveAgent(vectorAction);
// Penalty given each step to encourage agent to finish task quickly.
AddReward(-.00005f);
// Did the agent or block get pushed off the edge?
bool fail = false;
// If the agent has gone over the edge, end the episode.
if (!Physics.Raycast(agentRB.position, Vector3.down, 3))
{
// Fell off bro
fail = true;
// BAD AGENT
SetReward(-1f);
// If we mark an agent as done it will be reset automatically.
// AgentReset() will be called.
Done();
}
// If the block has gone over the edge, end the episode.
if (!Physics.Raycast(blockRB.position, Vector3.down, 3))
{
// Fell off bro
fail = true;
// BAD AGENT
SetReward(-1f);
// If we mark an agent as done it will be reset automatically.
// AgentReset() will be called.
Done();
}
if (fail)
{
// Swap ground material to indicate failure of the episode.
StartCoroutine(GoalScoredSwapGroundMaterial(academy.failMaterial, 1f));
}
}
/// <summary>
/// Resets the block position and velocities.
/// </summary>
void ResetBlock()
{
// Get a random position for the block.
block.transform.position = GetRandomSpawnPos(1.5f);
// Reset block velocity back to zero.
blockRB.velocity = Vector3.zero;
// Reset block angularVelocity back to zero.
blockRB.angularVelocity = Vector3.zero;
}
/// <summary>
/// In the editor, if "Reset On Done" is checked then AgentReset() will be
/// called automatically anytime we mark done = true in an agent script.
/// </summary>
public override void AgentReset()
{
int rotation = Random.Range(0, 4);
float rotationAngle = rotation * 90f;
area.transform.Rotate(new Vector3(0f, rotationAngle, 0f));
ResetBlock();
transform.position = GetRandomSpawnPos(1.5f);
agentRB.velocity = Vector3.zero;
agentRB.angularVelocity = Vector3.zero;
}
}