浏览代码

Added decision frequency and evaluation metric

/develop/parameterizedenvs
Scott 4 年前
当前提交
97990611
共有 7 个文件被更改,包括 154 次插入33 次删除
  1. 17
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity
  2. 26
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs
  3. 54
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs
  4. 13
      Project/Assets/ML-Agents/Examples/SharedAssets/Materials/Checkers_Gray.mat
  5. 13
      config/ppo/3DBallMulti_power.yaml
  6. 32
      config/ppo/3DBallMulti_distance.yaml
  7. 32
      config/ppo/3DBallMulti_time.yaml

17
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity


- target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: maxStep
value: 20
value: 200
objectReference: {fileID: 0}
- target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: rewardType
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: numberOfParallel
value: 12
objectReference: {fileID: 0}
- target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: decisionFrequency
value: 5
objectReference: {fileID: 0}
- target: {fileID: 8706416217891658080, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}

26
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs


[Tooltip("Number of maximum steps the agent can take in the environment. ")]
public int maxStep = 100;
[Tooltip("Specifies which reward function to use. For all environments")]
public Ball3DRewardType rewardType;
public Ball3DRewardType rewardType = Ball3DRewardType.Time;
public int decisionFrequency = 5;
public void Awake()
{
Academy.Instance.OnEnvironmentReset += UpdateEnvs;
}
update_agents();
}

{
foreach (var actor in actorObjs)
{
Ball3DMultiAgent agent = actor.GetComponent<Ball3DMultiAgent>();
Ball3DMultiAgent agent = actor.GetComponentInChildren<Ball3DMultiAgent>();
agent.setMaxStep(maxStep);
agent.setMaxStep(maxStep * decisionFrequency);
DecisionRequester dr = agent.GetComponent<DecisionRequester>();
dr.DecisionPeriod = decisionFrequency;
}
}
public void AreaReset()

}
}
}
public void FixedUpdate()
public void UpdateEnvs()
int df = (int)m_ResetParams.GetWithDefault("decisionFreq", decisionFrequency);
Ball3DRewardType rt = rewardType;
bool changed = false;
if (N != numberOfParallel)

changed = true;
maxStep = newStep;
}
if (df != decisionFrequency)
{
changed = true;
decisionFrequency = df;
}
if (rtype == 0)
{
rt = Ball3DRewardType.Time;

if (changed)
{
AreaReset();
update_agents();
update_agents();
}
}

54
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs


Rigidbody m_BallRb;
EnvironmentParameters m_ResetParams;
[Tooltip("Specifies which reward function to use. ")]
public Ball3DRewardType m_RewardType;
public Ball3DRewardType m_RewardType = Ball3DRewardType.Time;
StatsRecorder statsRecorder;
int stepsInGoal = -1;
int timestep = 0;
float maxdist = 3.54f; // assumes max distance is 2.5 - -2.5 in each dim. This is an upper bound.
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();

}
}
// public void FixedUpdate()
// {
// MaxStep = stepvalue;
// }
public override void OnActionReceived(ActionBuffers actionBuffers)
{
var actionZ = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f);

{
gameObject.transform.Rotate(new Vector3(1, 0, 0), actionX);
}
bool fell = ((ball.transform.position.y - gameObject.transform.position.y) < -2f ||
Mathf.Abs(ball.transform.position.x - gameObject.transform.position.x) > 3f ||
Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f);
float reward = 0.0f;
if (m_RewardType == Ball3DRewardType.Time)
{

{
reward = DistanceReward(ball.transform.position, goal.transform.position);
reward = DistanceReward(ball.transform.position, goal.transform.position, fell);
SetReward(reward);
if ((ball.transform.position.y - gameObject.transform.position.y) < -2f ||
Mathf.Abs(ball.transform.position.x - gameObject.transform.position.x) > 3f ||
Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
AddReward(reward);
float dist = Vector3.Distance(ball.transform.position, goal.transform.position);
if (dist <= epsilon)
{
stepsInGoal++;
}
if (fell)
void FixedUpdate()
{
timestep++;
}
public override void OnEpisodeBegin()
{
gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);

+ gameObject.transform.position;
//Reset the parameters when the Agent is reset.
SetResetParameters();
if (stepsInGoal >= 0)
{
var statsRecorder = Academy.Instance.StatsRecorder;
statsRecorder.Add("Environment/EvalMetric", (float)stepsInGoal / (float)MaxStep);
}
stepsInGoal = 0;
timestep = 0;
}
public override void Heuristic(in ActionBuffers actionsOut)

return 0.0f;
}
float DistanceReward(Vector3 ball, Vector3 goal)
float DistanceReward(Vector3 ball, Vector3 goal, bool fell)
return -dist;
float reward = -dist;
if (fell)
{
reward += -maxdist*(MaxStep - timestep);
}
return reward;
float maxdist = 3.54f; // assumes max distance is 2.5 - -2.5 in each dim. This is an upper bound.
float dist = Vector3.Distance(ball, goal);
//distance between our actual velocity and goal velocity
dist = Mathf.Clamp(dist, 0, maxdist);

13
Project/Assets/ML-Agents/Examples/SharedAssets/Materials/Checkers_Gray.mat


Material:
serializedVersion: 6
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_Name: Checkers_Gray
m_Shader: {fileID: 47, guid: 0000000000000000f000000000000000, type: 0}
m_ShaderKeywords: _GLOSSYREFLECTIONS_OFF _METALLICGLOSSMAP _NORMALMAP _SPECULARHIGHLIGHTS_OFF

m_Offset: {x: 0, y: 0}
- _EmissionMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Scale: {x: 3, y: 3}
m_Scale: {x: 1, y: 1}
m_Scale: {x: 3, y: 3}
m_Offset: {x: 0, y: 0}
- _MetallicGlossMap:
m_Texture: {fileID: 2800000, guid: 0dbde4b748147ad46bb2c40602273db7, type: 3}

- _GlossyReflections: 0
- _Metallic: 0
- _Mode: 0
- _OcclusionStrength: 1
- _OcclusionStrength: 0.359
- _Parallax: 0.02
- _SmoothnessTextureChannel: 0
- _SpecularHighlights: 0

m_Colors:
- _Color: {r: 1, g: 1, b: 1, a: 1}
- _Color: {r: 0.011347434, g: 0.8018868, b: 0.05088379, a: 1}
- _EmissionColor: {r: 0, g: 0, b: 0, a: 1}

13
config/ppo/3DBallMulti_power.yaml


trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
buffer_size: 5000
lambd: 0.99
lambd: 0.9
num_epoch: 3
learning_rate_schedule: constant
network_settings:

gamma: 0.999
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
max_steps: 5000000
summary_freq: 18000
summary_freq: 10000
maxStep: 20
maxStep: 1000
numParallel: 8
numParallel: 18
decisionFreq: 5

32
config/ppo/3DBallMulti_distance.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 5000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.9
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.999
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 5000
summary_freq: 10000
environment_parameters:
maxStep: 1000
rewardType: 1
numParallel: 18
decisionFreq: 5

32
config/ppo/3DBallMulti_time.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 5000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.9
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.999
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 5000
summary_freq: 10000
environment_parameters:
maxStep: 1000
rewardType: 0
numParallel: 18
decisionFreq: 5
正在加载...
取消
保存