浏览代码

fixed episode length modification issue.

/develop/parameterizedenvs
Scott 3 年前
当前提交
130512b4
共有 7 个文件被更改,包括 85 次插入36 次删除
  1. 33
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallAgentArea.prefab
  2. 3
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallMulti.prefab
  3. 5
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity
  4. 3
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs
  5. 16
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs
  6. 30
      3DBallMultiPower.yaml
  7. 31
      config/ppo/3DBallMulti_power.yaml

33
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallAgentArea.prefab


m_Component:
- component: {fileID: 8706416217891658080}
- component: {fileID: 3027918195473112231}
- component: {fileID: 2318041762275472194}
m_Layer: 0
m_Name: 3DBallAgentArea
m_TagString: Untagged

actorObjs: []
prefab: {fileID: 1321468028730240, guid: c5e235d7c7cba4e5393f3e6b4c6bfe44, type: 3}
numberOfParallel: 18
maxStep: 100
maxStep: 10
--- !u!114 &2318041762275472194
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 8706416217891658087}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 5d1c4e0b1822b495aa52bc52839ecb30, type: 3}
m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 1
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 01000000
VectorActionSize: 01000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 0}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: My Behavior
TeamId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

3
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallMulti.prefab


m_RewardType: 2
goal: {fileID: 5497951568357209023}
epsilon: 0.25
stepvalue: 40
--- !u!114 &1306725529891448089
MonoBehaviour:
m_ObjectHideFlags: 0

m_GameObject: {fileID: 5497951568357209023}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
m_Enabled: 0
serializedVersion: 2
m_Radius: 0.5
m_Center: {x: 0, y: 0, z: 0}

5
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity


m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: maxStep
value: 20
objectReference: {fileID: 0}
- target: {fileID: 8706416217891658080, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
type: 3}
propertyPath: m_RootOrder

3
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs


{
Ball3DMultiAgent agent = actor.GetComponent<Ball3DMultiAgent>();
agent.m_RewardType = rewardType;
agent.MaxStep = maxStep;
agent.setMaxStep(maxStep);
}
}
public void AreaReset()

if (changed)
{
AreaReset();
update_agents();
}
}
}

16
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs


public GameObject goal;
[Tooltip("Specifies the radius of the goal region")]
public float epsilon=0.25f;
public int stepvalue=5000;
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();

sensor.AddObservation(m_BallRb.velocity);
}
}
// public void FixedUpdate()
// {
// MaxStep = stepvalue;
// }
public override void OnActionReceived(ActionBuffers actionBuffers)
{

+ gameObject.transform.position;
//Reset the parameters when the Agent is reset.
SetResetParameters();
MaxStep = stepvalue;
}
public override void Heuristic(in ActionBuffers actionsOut)

float maxdist = 3.54f; // assumes max distance is 2.5 - -2.5 in each dim. This is an upper bound.
float dist = Vector3.Distance(ball, goal);
//distance between our actual velocity and goal velocity
dist = Mathf.Clamp(epsilon - dist, 0, maxdist);
dist = Mathf.Clamp(dist, 0, maxdist);
}
public void setMaxStep(int value)
{
stepvalue = value;
MaxStep = value;
}
}

30
3DBallMultiPower.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
# Add this section
environment_parameters:
maxStep: 1000

31
config/ppo/3DBallMulti_power.yaml


behaviors:
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.999
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5000
summary_freq: 18000
environment_parameters:
maxStep: 20
rewardType: 2
numParallel: 8
正在加载...
取消
保存