浏览代码

Agent.Heuristic takes an float[] (#3765)

/develop/add-fire
GitHub 5 年前
当前提交
dd6aa7e2
共有 21 个文件被更改,包括 93 次插入106 次删除
  1. 9
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  2. 11
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  3. 14
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  4. 12
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  5. 18
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  6. 18
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  7. 18
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  8. 16
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  9. 11
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  10. 14
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  11. 1
      com.unity.ml-agents/CHANGELOG.md
  12. 6
      com.unity.ml-agents/Runtime/Agent.cs
  13. 8
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  14. 11
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  15. 4
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  16. 3
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  17. 8
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  18. 2
      docs/Getting-Started.md
  19. 8
      docs/Learning-Environment-Create-New.md
  20. 3
      docs/Learning-Environment-Design-Agents.md
  21. 4
      docs/Migrating.md

9
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


SetResetParameters();
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[2];
action[0] = -Input.GetAxis("Horizontal");
action[1] = Input.GetAxis("Vertical");
return action;
actionsOut[0] = -Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
}
public void SetBall()

11
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


}
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[3];
action[0] = Input.GetAxis("Horizontal");
action[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
action[2] = Input.GetAxis("Vertical");
return action;
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
actionsOut[2] = Input.GetAxis("Vertical");
}
void Update()

14
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


MoveAgent(vectorAction);
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[4];
action[2] = 2f;
actionsOut[2] = 2f;
action[0] = 1f;
actionsOut[0] = 1f;
action[2] = 1f;
actionsOut[2] = 1f;
action[0] = 2f;
actionsOut[0] = 2f;
action[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
return action;
actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
}
public override void OnEpisodeBegin()

12
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


}
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
actionsOut[0] = k_NoAction;
return new float[] { k_Right };
actionsOut[0] = k_Right;
return new float[] { k_Up };
actionsOut[0] = k_Up;
return new float[] { k_Left };
actionsOut[0] = k_Left;
return new float[] { k_Down };
actionsOut[0] = k_Down;
return new float[] { k_NoAction };
}
// to be implemented by the developer

18
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


}
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
actionsOut[0] = 0;
return new float[] { 3 };
actionsOut[0] = 3;
if (Input.GetKey(KeyCode.W))
else if (Input.GetKey(KeyCode.W))
return new float[] { 1 };
actionsOut[0] = 1;
if (Input.GetKey(KeyCode.A))
else if (Input.GetKey(KeyCode.A))
return new float[] { 4 };
actionsOut[0] = 4;
if (Input.GetKey(KeyCode.S))
else if (Input.GetKey(KeyCode.S))
return new float[] { 2 };
actionsOut[0] = 2;
return new float[] { 0 };
}
public override void OnEpisodeBegin()

18
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


AddReward(-1f / maxStep);
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
actionsOut[0] = 0;
return new float[] { 3 };
actionsOut[0] = 3;
if (Input.GetKey(KeyCode.W))
else if (Input.GetKey(KeyCode.W))
return new float[] { 1 };
actionsOut[0] = 1;
if (Input.GetKey(KeyCode.A))
else if (Input.GetKey(KeyCode.A))
return new float[] { 4 };
actionsOut[0] = 4;
if (Input.GetKey(KeyCode.S))
else if (Input.GetKey(KeyCode.S))
return new float[] { 2 };
actionsOut[0] = 2;
return new float[] { 0 };
}
/// <summary>

18
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


MoveAgent(vectorAction);
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
actionsOut[0] = 0;
return new float[] { 3 };
actionsOut[0] = 3;
if (Input.GetKey(KeyCode.W))
else if (Input.GetKey(KeyCode.W))
return new float[] { 1 };
actionsOut[0] = 1;
if (Input.GetKey(KeyCode.A))
else if (Input.GetKey(KeyCode.A))
return new float[] { 4 };
actionsOut[0] = 4;
if (Input.GetKey(KeyCode.S))
else if (Input.GetKey(KeyCode.S))
return new float[] { 2 };
actionsOut[0] = 2;
return new float[] { 0 };
}
public override void OnEpisodeBegin()

16
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


MoveAgent(vectorAction);
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[3];
action[0] = 1f;
actionsOut[0] = 1f;
action[0] = 2f;
actionsOut[0] = 2f;
action[2] = 1f;
actionsOut[2] = 1f;
action[2] = 2f;
actionsOut[2] = 2f;
action[1] = 1f;
actionsOut[1] = 1f;
action[1] = 2f;
actionsOut[1] = 2f;
return action;
}
/// <summary>
/// Used to provide a "kick" to the ball.

11
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


m_TextComponent.text = score.ToString();
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[3];
action[0] = Input.GetAxis("Horizontal"); // Racket Movement
action[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f; // Racket Jumping
action[2] = Input.GetAxis("Vertical"); // Racket Rotation
return action;
actionsOut[0] = Input.GetAxis("Horizontal"); // Racket Movement
actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f; // Racket Jumping
actionsOut[2] = Input.GetAxis("Vertical"); // Racket Rotation
}
public override void OnEpisodeBegin()

14
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


}
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[4];
action[1] = 2f;
actionsOut[1] = 2f;
action[0] = 1f;
actionsOut[0] = 1f;
action[1] = 1f;
actionsOut[1] = 1f;
action[0] = 2f;
actionsOut[0] = 2f;
action[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
return action;
actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
}
// Detect when the agent hits the goal

1
com.unity.ml-agents/CHANGELOG.md


- Added ability to start training (initialize model weights) from a previous run ID. (#3710)
- The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
- The offset logic was removed from DecisionRequester.
- The signature of `Agent.Heuristic()` was changed to take a `float[]` as a parameter, instead of returning the array. This was done to prevent a common source of error where users would return arrays of the wrong size.
- The communication API version has been bumped up to 1.0.0 and will use [Semantic Versioning](https://semver.org/) to do compatibility checks for communication between Unity and the Python process.
### Minor Changes

6
com.unity.ml-agents/Runtime/Agent.cs


/// </summary>
/// <returns> A float array corresponding to the next action of the Agent
/// </returns>
public virtual float[] Heuristic()
public virtual void Heuristic(float[] actionsOut)
var param = m_PolicyFactory.brainParameters;
return new float[param.numActions];
Array.Clear(actionsOut, 0, actionsOut.Length);
}
/// <summary>

8
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


get { return m_BehaviorName + "?team=" + TeamId; }
}
internal IPolicy GeneratePolicy(Func<float[]> heuristic)
internal IPolicy GeneratePolicy(HeuristicPolicy.ActionGenerator heuristic)
return new HeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
case BehaviorType.InferenceOnly:
{
if (m_Model == null)

}
else
{
return new HeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
return new HeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
}
}

11
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


/// </summary>
internal class HeuristicPolicy : IPolicy
{
Func<float[]> m_Heuristic;
public delegate void ActionGenerator(float[] actionsOut);
ActionGenerator m_Heuristic;
int m_numActions;
WriteAdapter m_WriteAdapter = new WriteAdapter();
NullList m_NullList = new NullList();

public HeuristicPolicy(Func<float[]> heuristic)
public HeuristicPolicy(ActionGenerator heuristic, int numActions)
m_numActions = numActions;
}
/// <inheritdoc />

if (!info.done)
{
m_LastDecision = m_Heuristic.Invoke();
// Reset m_LastDecision each time.
m_LastDecision = new float[m_numActions];
m_Heuristic.Invoke(m_LastDecision);
}
}

4
com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs


[TestFixture]
public class BehaviorParameterTests
{
static float[] DummyHeuristic()
static void DummyHeuristic(float[] actionsOut)
return null;
// No-op
}
[Test]

3
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


agentActionCallsForEpisode = 0;
}
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
return new float[0];
}
}

8
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


{
public int numHeuristicCalls;
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
return base.Heuristic();
base.Heuristic(actionsOut);
}// Simple SensorComponent that sets up a StackingSensor
}
// Simple SensorComponent that sets up a StackingSensor
public class StackingComponent : SensorComponent
{
public SensorComponent wrappedComponent;

2
docs/Getting-Started.md


step.
* `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in the Behavior
Parameters of the Agent, the Agent will use the `Heuristic()` method to generate
the actions of the Agent. As such, the `Heuristic()` method returns an array of
the actions of the Agent. As such, the `Heuristic()` method takes an array of
floats. In the case of the Ball 3D Agent, the `Heuristic()` method converts the
keyboard inputs into actions.

8
docs/Learning-Environment-Create-New.md


The `Heuristic()` method will look like this :
```csharp
public override float[] Heuristic()
public override void Heuristic(float[] actionsOut)
var action = new float[2];
action[0] = Input.GetAxis("Horizontal");
action[1] = Input.GetAxis("Vertical");
return action;
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
}
```

3
docs/Learning-Environment-Design-Agents.md


Note that when you are programming actions for an agent, it is often helpful to
test your action logic using the `Heuristic()` method of the Agent,
which lets you map keyboard
commands to actions.
which lets you map keyboard commands to actions.
The [3DBall](Learning-Environment-Examples.md#3dball-3d-balance-ball) and
[Area](Learning-Environment-Examples.md#push-block) example environments are set

4
docs/Migrating.md


* The `play_against_current_self_ratio` self-play trainer hyperparameter has been renamed to `play_against_latest_model_ratio`
* Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
* The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
* The signature of `Agent.Heuristic()` was changed to take a `float[]` as a parameter, instead of returning the array. This was done to prevent a common source of error where users would return arrays of the wrong size.
### Steps to Migrate
* Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training

* `Academy.FloatProperties` was removed.
* `Academy.RegisterSideChannel` and `Academy.UnregisterSideChannel` were removed.
### Steps to Migrate
* If your Agent class overrides `Heuristic()`, change the signature to `public override void Heuristic(float[] actionsOut)` and assign values to `actionsOut` instead of returning an array.
## Migrating from 0.14 to 0.15

正在加载...
取消
保存