浏览代码

remove commented code, replaced the action bars with action hist in drone, added the drone and spider to the examples

/tag-0.2.0
vincentpierre 7 年前
当前提交
7f4284de
共有 5 个文件被更改,包括 2139 次插入155 次删除
  1. 35
      docs/Example-Environments.md
  2. 2
      unity-environment/Assets/ML-Agents/Examples/Drone/Scripts/DroneAgent.cs
  3. 255
      unity-environment/Assets/ML-Agents/Examples/Spider/Scripts/SpiderAgentConfigurable.cs
  4. 1001
      images/drone.png
  5. 1001
      images/spider.png

35
docs/Example-Environments.md


* Action space: (Continuous) Size of 4, corresponding to torque applicable to two joints.
* Observations: None
* Reset Parameters: Two, corresponding to goal size, and goal movement speed.
## Drone
![Drone](../images/drone.png)
* Set-up: Quadcopters have 4 engines to propel them.
* Goal: The agents must move towards a target in 3D.
* Agents: The environment contains 10 agent linked to a single brain.
* Agent Reward Function (independent):
* Between 0 and +0.5 Increasing the closer the agent is to his target.
* +1 For Being inside the target.
* Brains: One brain with the following state/action space.
* State space: (Continuous) 24 variables corresponding to position relative to target, rotation, velocity, angular velocity, acceleration and anglular acceleration.
* Action space: (Continuous) Size of 4, corresponding to the vertical force applied to each engine.
* Observations: None
* Reset Parameters: One, corresponding to goal size.
## Spider
![Spider](../images/spider.png)
* Set-up: A spere with 4 arms and 4 forearms.
* Goal: The agents must move its body along the x axis.
* Agents: The environment contains 3 agent linked to a single brain.
* Agent Reward Function (independent):
* +1 times velocity in the x direction
* -1 for falling.
* -0.01 times the action squared
* -0.05 times y position change
* -0.05 times velocity in the z direction
* Brains: One brain with the following state/action space.
* State space: (Continuous) 117 variables corresponding to position, rotation, velocity, and angular velocities of each limb plus the acceleration and angular acceleration of the body.
* Action space: (Continuous) Size of 12, corresponding to torque applicable to 12 joints.
* Observations: None
* Reset Parameters: None

2
unity-environment/Assets/ML-Agents/Examples/Drone/Scripts/DroneAgent.cs


public override void AgentStep(float[] act)
{
Monitor.Log("Action", act, MonitorType.bar, body);
Monitor.Log("Action", act, MonitorType.hist, body);
for(int i = 0; i<4 ; i++)
{

255
unity-environment/Assets/ML-Agents/Examples/Spider/Scripts/SpiderAgentConfigurable.cs


using System.Collections.Generic;
using UnityEngine;
public class SpiderAgentConfigurable: Agent {
public class SpiderAgentConfigurable: Agent
{
public float strength;
public float strength;
float x_position;
float x_position;
[HideInInspector]

public bool fell;
Vector3 past_velocity;
Vector3 past_velocity;
Transform body;
Transform body;
public Transform[] limbs;
public Transform[] limbs;
//
Dictionary<GameObject, Vector3> transformsPosition;
Dictionary<GameObject, Quaternion> transformsRotation;
//
Dictionary<GameObject, Vector3> transformsPosition;
Dictionary<GameObject, Quaternion> transformsRotation;
public override void InitializeAgent ()
{
public override void InitializeAgent()
{
body = transform.Find ("Sphere");
body = transform.Find("Sphere");
transformsPosition = new Dictionary<GameObject, Vector3> ();
transformsRotation = new Dictionary<GameObject, Quaternion> ();
transformsPosition = new Dictionary<GameObject, Vector3>();
transformsRotation = new Dictionary<GameObject, Quaternion>();
foreach (Transform child in allChildren) {
transformsPosition [child.gameObject] = child.position;
transformsRotation [child.gameObject] = child.rotation;
}
foreach (Transform child in allChildren)
{
transformsPosition[child.gameObject] = child.position;
transformsRotation[child.gameObject] = child.rotation;
}
}
}
public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add (body.transform.rotation.eulerAngles.x);
state.Add (body.transform.rotation.eulerAngles.y);
state.Add (body.transform.rotation.eulerAngles.z);
public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(body.transform.rotation.eulerAngles.x);
state.Add(body.transform.rotation.eulerAngles.y);
state.Add(body.transform.rotation.eulerAngles.z);
state.Add (body.gameObject.GetComponent<Rigidbody> ().velocity.x);
state.Add (body.gameObject.GetComponent<Rigidbody> ().velocity.y);
state.Add (body.gameObject.GetComponent<Rigidbody> ().velocity.z);
state.Add(body.gameObject.GetComponent<Rigidbody>().velocity.x);
state.Add(body.gameObject.GetComponent<Rigidbody>().velocity.y);
state.Add(body.gameObject.GetComponent<Rigidbody>().velocity.z);
state.Add ((body.gameObject.GetComponent<Rigidbody> ().velocity.x - past_velocity.x) / Time.fixedDeltaTime);
state.Add ((body.gameObject.GetComponent<Rigidbody> ().velocity.y - past_velocity.y) / Time.fixedDeltaTime);
state.Add ((body.gameObject.GetComponent<Rigidbody> ().velocity.z - past_velocity.z) / Time.fixedDeltaTime);
past_velocity = body.gameObject.GetComponent<Rigidbody> ().velocity;
state.Add((body.gameObject.GetComponent<Rigidbody>().velocity.x - past_velocity.x) / Time.fixedDeltaTime);
state.Add((body.gameObject.GetComponent<Rigidbody>().velocity.y - past_velocity.y) / Time.fixedDeltaTime);
state.Add((body.gameObject.GetComponent<Rigidbody>().velocity.z - past_velocity.z) / Time.fixedDeltaTime);
past_velocity = body.gameObject.GetComponent<Rigidbody>().velocity;
foreach (Transform t in limbs) {
state.Add (t.localPosition.x);
state.Add (t.localPosition.y);
state.Add (t.localPosition.z);
state.Add (t.localRotation.x);
state.Add (t.localRotation.y);
state.Add (t.localRotation.z);
state.Add (t.localRotation.w);
Rigidbody rb = t.gameObject.GetComponent < Rigidbody > ();
state.Add (rb.velocity.x);
state.Add (rb.velocity.y);
state.Add (rb.velocity.z);
state.Add (rb.angularVelocity.x);
state.Add (rb.angularVelocity.y);
state.Add (rb.angularVelocity.z);
}
foreach (Transform t in limbs)
{
state.Add(t.localPosition.x);
state.Add(t.localPosition.y);
state.Add(t.localPosition.z);
state.Add(t.localRotation.x);
state.Add(t.localRotation.y);
state.Add(t.localRotation.z);
state.Add(t.localRotation.w);
Rigidbody rb = t.gameObject.GetComponent < Rigidbody >();
state.Add(rb.velocity.x);
state.Add(rb.velocity.y);
state.Add(rb.velocity.z);
state.Add(rb.angularVelocity.x);
state.Add(rb.angularVelocity.y);
state.Add(rb.angularVelocity.z);
}

leg_touching[index] = false;
}
// Monitor.Log ("State", state, MonitorType.hist, body.gameObject.transform);
return state;
}
public override void AgentStep(float[] act)
{
for (int k = 0; k < act.Length; k++)
{
act[k] = Mathf.Max(Mathf.Min(act[k], 1), -1);
}
return state;
}
limbs[0].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[0].transform.right * strength * act[0]);
limbs[1].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[1].transform.right * strength * act[1]);
limbs[2].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[2].transform.right * strength * act[2]);
limbs[3].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[3].transform.right * strength * act[3]);
public override void AgentStep(float[] act)
{
for (int k = 0; k < act.Length; k++)
{
act[k] = Mathf.Max(Mathf.Min(act[k], 1), -1);
}
limbs[0].gameObject.GetComponent<Rigidbody> ().AddTorque (-body.transform.up * strength * act[4]);
limbs[1].gameObject.GetComponent<Rigidbody> ().AddTorque (-body.transform.up * strength * act[5]);
limbs[2].gameObject.GetComponent<Rigidbody> ().AddTorque (-body.transform.up * strength * act[6]);
limbs[3].gameObject.GetComponent<Rigidbody> ().AddTorque (-body.transform.up * strength * act[7]);
limbs[0].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[0].transform.right * strength * act[0]);
limbs[1].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[1].transform.right * strength * act[1]);
limbs[2].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[2].transform.right * strength * act[2]);
limbs[3].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[3].transform.right * strength * act[3]);
limbs[4].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[4].transform.right * strength * act[8]);
limbs[5].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[5].transform.right * strength * act[9]);
limbs[6].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[6].transform.right * strength * act[10]);
limbs[7].gameObject.GetComponent<Rigidbody> ().AddTorque (-limbs[7].transform.right * strength * act[11]);
// leg0.gameObject.GetComponent<Rigidbody> ().AddTorque (-leg0.transform.right * strength * act[0]);
//// shoulder0.gameObject.GetComponent<Rigidbody> ().AddTorque (leg0.transform.right * strength * act[0]);
// leg1.gameObject.GetComponent<Rigidbody> ().AddTorque (-leg1.transform.right * strength * act[1]);
//// shoulder1.gameObject.GetComponent<Rigidbody> ().AddTorque (leg1.transform.right * strength * act[1]);
// leg2.gameObject.GetComponent<Rigidbody> ().AddTorque (-leg2.transform.right * strength * act[2]);
//// shoulder2.gameObject.GetComponent<Rigidbody> ().AddTorque (leg2.transform.right * strength * act[2]);
// leg3.gameObject.GetComponent<Rigidbody> ().AddTorque (-leg3.transform.right * strength * act[3]);
//// shoulder3.gameObject.GetComponent<Rigidbody> ().AddTorque (leg3.transform.right * strength * act[3]);
limbs[0].gameObject.GetComponent<Rigidbody>().AddTorque(-body.transform.up * strength * act[4]);
limbs[1].gameObject.GetComponent<Rigidbody>().AddTorque(-body.transform.up * strength * act[5]);
limbs[2].gameObject.GetComponent<Rigidbody>().AddTorque(-body.transform.up * strength * act[6]);
limbs[3].gameObject.GetComponent<Rigidbody>().AddTorque(-body.transform.up * strength * act[7]);
// foreleg0.gameObject.GetComponent<Rigidbody> ().AddTorque (-foreleg0.transform.right * strength * act[4]);
//// leg0.gameObject.GetComponent<Rigidbody> ().AddTorque (leg0.transform.right * strength * act[4]);
// foreleg1.gameObject.GetComponent<Rigidbody> ().AddTorque (-foreleg1.transform.right * strength * act[5]);
//// leg1.gameObject.GetComponent<Rigidbody> ().AddTorque (foreleg1.transform.right * strength * act[5]);
// foreleg2.gameObject.GetComponent<Rigidbody> ().AddTorque (-foreleg2.transform.right * strength * act[6]);
//// leg2.gameObject.GetComponent<Rigidbody> ().AddTorque (foreleg2.transform.right * strength * act[6]);
// foreleg3.gameObject.GetComponent<Rigidbody> ().AddTorque (-foreleg3.transform.right * strength * act[7]);
//// leg3.gameObject.GetComponent<Rigidbody> ().AddTorque (foreleg3.transform.right * strength * act[7]);
// shoulder0.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder0.transform.up * strength * act[8]);
// sphere.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder0.transform.up * strength * act[8]);
// shoulder1.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder1.transform.up * strength * act[9]);
// sphere.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder1.transform.up * strength * act[9]);
// shoulder2.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder2.transform.up * strength * act[10]);
// sphere.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder2.transform.up * strength * act[10]);
// shoulder3.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder3.transform.up * strength * act[11]);
// sphere.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder3.transform.up * strength * act[11]);
limbs[4].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[4].transform.right * strength * act[8]);
limbs[5].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[5].transform.right * strength * act[9]);
limbs[6].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[6].transform.right * strength * act[10]);
limbs[7].gameObject.GetComponent<Rigidbody>().AddTorque(-limbs[7].transform.right * strength * act[11]);
// shoulder0.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder0.transform.up * strength * act[8]);
//// body.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder0.transform.up * strength * act[8]);
// shoulder1.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder1.transform.up * strength * act[9]);
//// body.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder1.transform.up * strength * act[9]);
// shoulder2.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder2.transform.up * strength * act[10]);
//// body.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder2.transform.up * strength * act[10]);
// shoulder3.gameObject.GetComponent<Rigidbody> ().AddTorque (-shoulder3.transform.up * strength * act[11]);
//// body.gameObject.GetComponent<Rigidbody> ().AddTorque (shoulder3.transform.up * strength * act[11]);
// Debug.Log(leg0Limits.max);
// Debug.Log(leg0.limits.max);
// Debug.Log(leg0.angle);
// Debug.Log(leg0.useLimits);
// leg0.limits = leg0Limits;
float torque_penalty = act [0] * act[0] + act [1] * act[1] + act [2] * act[2] + act [3] * act[3]
+ act [4] * act[4]+ act [5]* act[5]+ act [6] * act[6]+ act [7] * act[7]
+ act [8] * act[8]+ act [9]* act[9] + act [10] * act[10] + act [11] * act[11];
float torque_penalty = act[0] * act[0] + act[1] * act[1] + act[2] * act[2] + act[3] * act[3]
+ act[4] * act[4] + act[5] * act[5] + act[6] * act[6] + act[7] * act[7]
+ act[8] * act[8] + act[9] * act[9] + act[10] * act[10] + act[11] * act[11];
// reward = sphere.GetComponent<Rigidbody>().velocity.x + (sphere.transform.position.y - 1) * 0.05f - 0f * torque_penalty;
// reward = 0.1f;
reward = (0
- 0.01f * torque_penalty
+ 1.0f * body.GetComponent<Rigidbody> ().velocity.x
// + 0.1f * Vector3.Dot (body.transform.up, new Vector3 (0, 1, 0))
-0.05f * Mathf.Abs(body.transform.position.z - body.transform.parent.transform.position.z)
-0.05f * Mathf.Abs(body.GetComponent<Rigidbody> ().velocity.y)
);
// + Mathf.Min(Mathf.Max(0, sphere.transform.position.y * 0.05f), 1);
reward = (0
- 0.01f * torque_penalty
+ 1.0f * body.GetComponent<Rigidbody>().velocity.x
- 0.05f * Mathf.Abs(body.transform.position.z - body.transform.parent.transform.position.z)
- 0.05f * Mathf.Abs(body.GetComponent<Rigidbody>().velocity.y)
);
}
if (fell)
{

}
Monitor.Log ("Reward", reward, MonitorType.slider, body.gameObject.transform);
// Debug.Log(reward);
Monitor.Log("Reward", reward, MonitorType.slider, body.gameObject.transform);
foreach (Transform child in allChildren) {
// if (child.gameObject.name.Contains("Spider"))
// {
// continue;
// }
// Debug.Log(child.parent.localScale.x);
foreach (Transform child in allChildren)
{
// Vector3 scaleTmp = child.transform.localScale;
// scaleTmp.x /= child.parent.localScale.x;
// scaleTmp.y /= child.parent.localScale.y;
// scaleTmp.z /= child.parent.localScale.z;
// child.transform.parent = child.parent;
// Debug.Log(scaleTmp.x);
// child.transform.localScale = scaleTmp;
}
}
public override void AgentReset()
{
public override void AgentReset()
{
foreach (Transform child in allChildren) {
if ((child.gameObject.name.Contains("Spider"))
|| (child.gameObject.name.Contains("parent")))
// || (child.gameObject.name.Contains("Sphere")))
foreach (Transform child in allChildren)
{
if ((child.gameObject.name.Contains("Spider"))
|| (child.gameObject.name.Contains("parent")))
child.position = transformsPosition [child.gameObject];
child.rotation = transformsRotation [child.gameObject];
child.gameObject.GetComponent<Rigidbody> ().velocity = default(Vector3);
child.gameObject.GetComponent<Rigidbody> ().angularVelocity = default(Vector3);
}
gameObject.transform.rotation = Quaternion.Euler (new Vector3 (0, Random.value * 90 - 45, 0));
}
child.position = transformsPosition[child.gameObject];
child.rotation = transformsRotation[child.gameObject];
child.gameObject.GetComponent<Rigidbody>().velocity = default(Vector3);
child.gameObject.GetComponent<Rigidbody>().angularVelocity = default(Vector3);
}
gameObject.transform.rotation = Quaternion.Euler(new Vector3(0, Random.value * 90 - 45, 0));
}
public override void AgentOnDone()
{
public override void AgentOnDone()
{
}
}

1001
images/drone.png
文件差异内容过多而无法显示
查看文件

1001
images/spider.png
文件差异内容过多而无法显示
查看文件

正在加载...
取消
保存