Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

269 行
7.9 KiB

//Put this script on your blue cube.
using System.Collections;
using UnityEngine;
using MLAgents;
public class PushAgentBasic : Agent
{
/// <summary>
/// The ground. The bounds are used to spawn the elements.
/// </summary>
public GameObject ground;
public GameObject area;
/// <summary>
/// The area bounds.
/// </summary>
[HideInInspector]
public Bounds areaBounds;
PushBlockAcademy m_Academy;
/// <summary>
/// The goal to push the block to.
/// </summary>
public GameObject goal;
/// <summary>
/// The block to be pushed to the goal.
/// </summary>
public GameObject block;
/// <summary>
/// Detects when the block touches the goal.
/// </summary>
[HideInInspector]
public GoalDetect goalDetect;
public bool useVectorObs;
Rigidbody m_BlockRb; //cached on initialization
Rigidbody m_AgentRb; //cached on initialization
Material m_GroundMaterial; //cached on Awake()
RayPerception m_RayPer;
float[] m_RayAngles = { 0f, 45f, 90f, 135f, 180f, 110f, 70f };
string[] m_DetectableObjects = { "block", "goal", "wall" };
/// <summary>
/// We will be changing the ground material based on success/failue
/// </summary>
Renderer m_GroundRenderer;
void Awake()
{
m_Academy = FindObjectOfType<PushBlockAcademy>(); //cache the academy
}
public override void InitializeAgent()
{
base.InitializeAgent();
goalDetect = block.GetComponent<GoalDetect>();
goalDetect.agent = this;
m_RayPer = GetComponent<RayPerception>();
// Cache the agent rigidbody
m_AgentRb = GetComponent<Rigidbody>();
// Cache the block rigidbody
m_BlockRb = block.GetComponent<Rigidbody>();
// Get the ground's bounds
areaBounds = ground.GetComponent<Collider>().bounds;
// Get the ground renderer so we can change the material when a goal is scored
m_GroundRenderer = ground.GetComponent<Renderer>();
// Starting material
m_GroundMaterial = m_GroundRenderer.material;
SetResetParameters();
}
public override void CollectObservations()
{
if (useVectorObs)
{
var rayDistance = 12f;
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects, 1.5f, 0f));
}
}
/// <summary>
/// Use the ground's bounds to pick a random spawn position.
/// </summary>
public Vector3 GetRandomSpawnPos()
{
var foundNewSpawnLocation = false;
var randomSpawnPos = Vector3.zero;
while (foundNewSpawnLocation == false)
{
var randomPosX = Random.Range(-areaBounds.extents.x * m_Academy.spawnAreaMarginMultiplier,
areaBounds.extents.x * m_Academy.spawnAreaMarginMultiplier);
var randomPosZ = Random.Range(-areaBounds.extents.z * m_Academy.spawnAreaMarginMultiplier,
areaBounds.extents.z * m_Academy.spawnAreaMarginMultiplier);
randomSpawnPos = ground.transform.position + new Vector3(randomPosX, 1f, randomPosZ);
if (Physics.CheckBox(randomSpawnPos, new Vector3(2.5f, 0.01f, 2.5f)) == false)
{
foundNewSpawnLocation = true;
}
}
return randomSpawnPos;
}
/// <summary>
/// Called when the agent moves the block into the goal.
/// </summary>
public void ScoredAGoal()
{
// We use a reward of 5.
AddReward(5f);
// By marking an agent as done AgentReset() will be called automatically.
Done();
// Swap ground material for a bit to indicate we scored.
StartCoroutine(GoalScoredSwapGroundMaterial(m_Academy.goalScoredMaterial, 0.5f));
}
/// <summary>
/// Swap ground material, wait time seconds, then swap back to the regular material.
/// </summary>
IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{
m_GroundRenderer.material = mat;
yield return new WaitForSeconds(time); // Wait for 2 sec
m_GroundRenderer.material = m_GroundMaterial;
}
/// <summary>
/// Moves the agent according to the selected action.
/// </summary>
public void MoveAgent(float[] act)
{
var dirToGo = Vector3.zero;
var rotateDir = Vector3.zero;
var action = Mathf.FloorToInt(act[0]);
// Goalies and Strikers have slightly different action spaces.
switch (action)
{
case 1:
dirToGo = transform.forward * 1f;
break;
case 2:
dirToGo = transform.forward * -1f;
break;
case 3:
rotateDir = transform.up * 1f;
break;
case 4:
rotateDir = transform.up * -1f;
break;
case 5:
dirToGo = transform.right * -0.75f;
break;
case 6:
dirToGo = transform.right * 0.75f;
break;
}
transform.Rotate(rotateDir, Time.fixedDeltaTime * 200f);
m_AgentRb.AddForce(dirToGo * m_Academy.agentRunSpeed,
ForceMode.VelocityChange);
}
/// <summary>
/// Called every step of the engine. Here the agent takes an action.
/// </summary>
public override void AgentAction(float[] vectorAction, string textAction)
{
// Move the agent using the action.
MoveAgent(vectorAction);
// Penalty given each step to encourage agent to finish task quickly.
AddReward(-1f / agentParameters.maxStep);
}
public override float[] Heuristic()
{
if (Input.GetKey(KeyCode.D))
{
return new float[] { 3 };
}
if (Input.GetKey(KeyCode.W))
{
return new float[] { 1 };
}
if (Input.GetKey(KeyCode.A))
{
return new float[] { 4 };
}
if (Input.GetKey(KeyCode.S))
{
return new float[] { 2 };
}
return new float[] { 0 };
}
/// <summary>
/// Resets the block position and velocities.
/// </summary>
void ResetBlock()
{
// Get a random position for the block.
block.transform.position = GetRandomSpawnPos();
// Reset block velocity back to zero.
m_BlockRb.velocity = Vector3.zero;
// Reset block angularVelocity back to zero.
m_BlockRb.angularVelocity = Vector3.zero;
}
/// <summary>
/// In the editor, if "Reset On Done" is checked then AgentReset() will be
/// called automatically anytime we mark done = true in an agent script.
/// </summary>
public override void AgentReset()
{
var rotation = Random.Range(0, 4);
var rotationAngle = rotation * 90f;
area.transform.Rotate(new Vector3(0f, rotationAngle, 0f));
ResetBlock();
transform.position = GetRandomSpawnPos();
m_AgentRb.velocity = Vector3.zero;
m_AgentRb.angularVelocity = Vector3.zero;
SetResetParameters();
}
public void SetGroundMaterialFriction()
{
var resetParams = m_Academy.resetParameters;
var groundCollider = ground.GetComponent<Collider>();
groundCollider.material.dynamicFriction = resetParams["dynamic_friction"];
groundCollider.material.staticFriction = resetParams["static_friction"];
}
public void SetBlockProperties()
{
var resetParams = m_Academy.resetParameters;
//Set the scale of the block
m_BlockRb.transform.localScale = new Vector3(resetParams["block_scale"], 0.75f, resetParams["block_scale"]);
// Set the drag of the block
m_BlockRb.drag = resetParams["block_drag"];
}
public void SetResetParameters()
{
SetGroundMaterialFriction();
SetBlockProperties();
}
}