浏览代码

update scenes and get them training

/PhysXArticulations20201Package
Chris Elion 5 年前
当前提交
20b5a157
共有 10 个文件被更改,包括 189 次插入167 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/Crawler/Prefabs/ArticulatedCrawler.prefab
  2. 16
      Project/Assets/ML-Agents/Examples/Crawler/Scenes/ArticulatedCrawlerDynamicTarget.unity
  3. 84
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/ArticulatedCrawlerAgent.cs
  4. 79
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  5. 51
      Project/Assets/ML-Agents/Examples/Reacher/Prefabs/ArticulatedAgent.prefab
  6. 42
      Project/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacher.unity
  7. 13
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ArticulatedReacherAgent.cs
  8. 15
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  9. 27
      config/ppo/ArticulatedCrawlerDynamic.yaml
  10. 27
      config/ppo/ArticulatedReacher.yaml

2
Project/Assets/ML-Agents/Examples/Crawler/Prefabs/ArticulatedCrawler.prefab


m_Model: {fileID: 0}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: My Behavior
m_BehaviorName: ArticulatedCrawlerDynamic
TeamId: 0
m_UseChildSensors: 1
m_ObservableAttributeHandling: 0

16
Project/Assets/ML-Agents/Examples/Crawler/Scenes/ArticulatedCrawlerDynamicTarget.unity


objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: ef4eb61889a846e35a3fc88b75abcce2, type: 3}
--- !u!4 &973938767 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 52452562844929545, guid: ef4eb61889a846e35a3fc88b75abcce2,
type: 3}
m_PrefabInstance: {fileID: 1642605934}
m_PrefabAsset: {fileID: 0}
--- !u!1001 &1192304951
PrefabInstance:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: ec40c70d5160b47cd8deaab79e24892c, type: 3}
m_Name:
m_EditorClassIdentifier:
target: {fileID: 973938767}
--- !u!1001 &1435218879
PrefabInstance:
m_ObjectHideFlags: 0

serializedVersion: 6
m_Component:
- component: {fileID: 2095421679}
- component: {fileID: 2095421680}
- component: {fileID: 2095421680}
m_Layer: 0
m_Name: Academy
m_TagString: Untagged

m_GameObject: {fileID: 2095421678}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 05e76ca0a155e48caa36ee60e64ee9c9, type: 3}
m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
gravityMultiplier: 1
fixedDeltaTime: 0.01333
maximumDeltaTime: 0.15
solverIterations: 12
solverVelocityIterations: 12
--- !u!114 &2095421681
MonoBehaviour:
m_ObjectHideFlags: 0

84
Project/Assets/ML-Agents/Examples/Crawler/Scripts/ArticulatedCrawlerAgent.cs


public MeshRenderer foot3;
public Material groundedMaterial;
public Material unGroundedMaterial;
bool m_IsNewDecisionStep;
int m_CurrentDecisionStep;
Quaternion m_LookRotation;
Matrix4x4 m_TargetDirMatrix;

m_JdController = GetComponent<ArticulatedJointDriveController>();
m_CurrentDecisionStep = 1;
m_DirToTarget = target.position - body.position;
m_JdController.Reset();

leg2LowerName = leg2Lower.name;
leg3UpperName = leg3Upper.name;
leg3LowerName = leg3Lower.name;
}
/// <summary>
/// We only need to change the joint settings based on decision freq.
/// </summary>
public void IncrementDecisionTimer()
{
// TODO DecisionRequester
// if (m_CurrentDecisionStep == agentParameters.numberOfActionsBetweenDecisions
// || agentParameters.numberOfActionsBetweenDecisions == 1)
// {
// m_CurrentDecisionStep = 1;
// m_IsNewDecisionStep = true;
// }
// else
// {
// m_CurrentDecisionStep++;
// m_IsNewDecisionStep = false;
// }
}
/// <summary>

public override void OnActionReceived(float[] vectorAction)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
// Pick a new target joint rotation
bpDict[leg0Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg1Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg2Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg3Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
// Update joint strength
bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
}
void FixedUpdate()
{
if (detectTargets)
{
foreach (var bodyPart in m_JdController.bodyPartsDict.Values)

: unGroundedMaterial;
}
// Joint update logic only needs to happen when a new decision is made
if (m_IsNewDecisionStep)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
// Pick a new target joint rotation
bpDict[leg0Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg1Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg2Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg3Upper].SetJointTargetRotation(0, vectorAction[++i], vectorAction[++i]);
bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
// Update joint strength
bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
}
// Set reward for this step according to mixture of the following elements.
if (rewardMovingTowardsTarget)
{

{
RewardFunctionTimePenalty();
}
IncrementDecisionTimer();
}
/// <summary>

{
GetRandomTargetPos();
}
m_IsNewDecisionStep = true;
m_CurrentDecisionStep = 1;
}
/// <summary>

79
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


}
/// <summary>
/// We only need to change the joint settings based on decision freq.
/// </summary>
public void IncrementDecisionTimer()
{
// TODO Decision Requester
// if (m_CurrentDecisionStep == agentParameters.numberOfActionsBetweenDecisions
// || agentParameters.numberOfActionsBetweenDecisions == 1)
// {
// m_CurrentDecisionStep = 1;
// m_IsNewDecisionStep = true;
// }
// else
// {
// m_CurrentDecisionStep++;
// m_IsNewDecisionStep = false;
// }
}
/// <summary>
/// Add relevant information on each body part to observations.
/// </summary>
public void CollectObservationBodyPart(VectorSensor vectorSensor, BodyPart bp)

public override void OnActionReceived(float[] vectorAction)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
// Pick a new target joint rotation
bpDict[leg0Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg1Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg2Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg3Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
// Update joint strength
bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
}
void FixedUpdate()
{
if (detectTargets)
{
foreach (var bodyPart in m_JdController.bodyPartsDict.Values)

: unGroundedMaterial;
}
// Joint update logic only needs to happen when a new decision is made
if (m_IsNewDecisionStep)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
// Pick a new target joint rotation
bpDict[leg0Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg1Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg2Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg3Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
// Update joint strength
bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
}
// Set reward for this step according to mixture of the following elements.
if (rewardMovingTowardsTarget)
{

{
RewardFunctionTimePenalty();
}
IncrementDecisionTimer();
}
/// <summary>

51
Project/Assets/ML-Agents/Examples/Reacher/Prefabs/ArticulatedAgent.prefab


m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_Component:
- component: {fileID: 4067321601414524}
- component: {fileID: 8825304141050044247}
- component: {fileID: 3631661136292700196}
- component: {fileID: 4734153522148485267}
m_Layer: 0
m_Name: ArticulatedAgent
m_TagString: Untagged

m_Script: {fileID: 11500000, guid: 6143635a8af8679a8a563a8820c9c9ee, type: 3}
m_Name:
m_EditorClassIdentifier:
brain: {fileID: 11400000, guid: aee5a4acc5804447682bf509557afa4f, type: 2}
agentCameras: []
agentRenderTextures: []
resetOnDone: 1
onDemandDecision: 0
numberOfActionsBetweenDecisions: 4
hasUpgradedFromAgentParameters: 1
MaxStep: 4000
reacherRoot: {fileID: 3423762949051111643}
pendulumA: {fileID: 1612288809266921535}
pendulumB: {fileID: 2828438114980972834}

type: 3}
--- !u!114 &3631661136292700196
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1395682910799436}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 3a5c9d521e5ef4759a8246a07d52221e, type: 3}
m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 4
TakeActionsBetweenDecisions: 1
--- !u!114 &4734153522148485267
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1395682910799436}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 5d1c4e0b1822b495aa52bc52839ecb30, type: 3}
m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 33
NumStackedVectorObservations: 1
VectorActionSize: 04000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_Model: {fileID: 0}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: ArticulatedReacher
TeamId: 0
m_UseChildSensors: 1
m_ObservableAttributeHandling: 0
--- !u!1 &1986879271678326
GameObject:
m_ObjectHideFlags: 0

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

42
Project/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacher.unity


manualTileSize: 0
tileSize: 256
accuratePlacement: 0
maxJobWorkers: 0
preserveTilesOutsideBounds: 0
debug:
m_Flags: 0
m_NavMeshData: {fileID: 0}

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RayTracingMode: 2
m_RayTraceProcedural: 0
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:

- component: {fileID: 1574236049}
- component: {fileID: 1574236048}
m_Layer: 0
m_Name: Academy
m_Name: ReacherAcademy
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_GameObject: {fileID: 1574236047}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 4c970415924214d13949fbd6cddd1759, type: 3}
m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: aee5a4acc5804447682bf509557afa4f, type: 2}
m_BrainsToControl:
- {fileID: 11400000, guid: aee5a4acc5804447682bf509557afa4f, type: 2}
m_TrainingConfiguration:
width: 80
height: 80
qualityLevel: 1
timeScale: 100
targetFrameRate: 60
m_InferenceConfiguration:
width: 1280
height: 720
qualityLevel: 5
timeScale: 1
targetFrameRate: 60
resetParameters:
m_ResetParameters:
- key: goal_size
value: 5
- key: goal_speed
value: 1
- key: gravity
value: 9.81
- key: deviation
value: 0
- key: deviation_freq
value: 0
gravityMultiplier: 1
fixedDeltaTime: 0.02
maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
--- !u!4 &1574236049
Transform:
m_ObjectHideFlags: 0

13
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ArticulatedReacherAgent.cs


public GameObject goal;
public GameObject reacherRootPrefab;
//private ReacherAcademy m_MyAcademy;
EnvironmentParameters m_ResetParams;
float m_GoalDegree;
private string m_PendulumAName;

{
m_RbA = pendulumA.GetComponent<ArticulationBody>();
m_RbB = pendulumB.GetComponent<ArticulationBody>();
//m_MyAcademy = GameObject.Find("Academy").GetComponent<ReacherAcademy>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
m_PendulumAName = pendulumA.name;

public void SetResetParameters()
{
// TODO use m_ResetParams.GetWithDefault
// m_GoalSize = m_MyAcademy.resetParameters["goal_size"];
// m_GoalSpeed = Random.Range(-1f, 1f) * m_MyAcademy.resetParameters["goal_speed"];
// m_Deviation = m_MyAcademy.resetParameters["deviation"];
// m_DeviationFreq = m_MyAcademy.resetParameters["deviation_freq"];
m_GoalSize = m_ResetParams.GetWithDefault("goal_size", 5);
m_GoalSpeed = Random.Range(-1f, 1f) * m_ResetParams.GetWithDefault("goal_speed", 1);
m_Deviation = m_ResetParams.GetWithDefault("deviation", 0);
m_DeviationFreq = m_ResetParams.GetWithDefault("deviation_freq", 0);
}
}

15
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


public GameObject pendulumB;
public GameObject hand;
public GameObject goal;
//private ReacherAcademy m_MyAcademy;
float m_GoalDegree;
private Rigidbody m_RbA;
private Rigidbody m_RbB;

// Frequency of the cosine deviation of the goal along the vertical dimension
private float m_DeviationFreq;
EnvironmentParameters m_ResetParams;
/// <summary>
/// Collect the rigidbodies of the reacher in order to resue them for
/// observations and actions.

m_RbA = pendulumA.GetComponent<Rigidbody>();
m_RbB = pendulumB.GetComponent<Rigidbody>();
//m_MyAcademy = GameObject.Find("Academy").GetComponent<ReacherAcademy>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetResetParameters()
{
// TODO
// m_GoalSize = m_MyAcademy.resetParameters["goal_size"];
// m_GoalSpeed = Random.Range(-1f, 1f) * m_MyAcademy.resetParameters["goal_speed"];
// m_Deviation = m_MyAcademy.resetParameters["deviation"];
// m_DeviationFreq = m_MyAcademy.resetParameters["deviation_freq"];
m_GoalSize = m_ResetParams.GetWithDefault("goal_size", 5);
m_GoalSpeed = Random.Range(-1f, 1f) * m_ResetParams.GetWithDefault("goal_speed", 1);
m_Deviation = m_ResetParams.GetWithDefault("deviation", 0);
m_DeviationFreq = m_ResetParams.GetWithDefault("deviation_freq", 0);
}
public override void Heuristic(float[] actionsOut)

27
config/ppo/ArticulatedCrawlerDynamic.yaml


behaviors:
ArticulatedCrawlerDynamic:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true

27
config/ppo/ArticulatedReacher.yaml


behaviors:
ArticulatedReacher:
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 60000
threaded: true
正在加载...
取消
保存