浏览代码

Further modifications to make PPO work

/PhysXArticulations20201
Vilmantas Balasevicius 5 年前
当前提交
2d032594
共有 7 个文件被更改,包括 387 次插入944 次删除
  1. 164
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Prefabs/ArticulatedAgent.prefab
  2. 70
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacher.unity
  3. 986
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacherManualControl.unity
  4. 43
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ArticulatedReacherAgent.cs
  5. 62
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ManualControlReacherArm.cs
  6. 4
      config/sac_trainer_config.yaml
  7. 2
      config/trainer_config.yaml

164
UnitySDK/Assets/ML-Agents/Examples/Reacher/Prefabs/ArticulatedAgent.prefab


m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 3, z: 1}
m_Children: []
m_Children:
- {fileID: 4910003592613346}
m_RootOrder: 1
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &33129520809121966
MeshFilter:

- component: {fileID: 33524134957751370}
- component: {fileID: 135370188030477524}
- component: {fileID: 23420739137250176}
- component: {fileID: 7502585849089161407}
m_Layer: 0
m_Name: Sphere
m_TagString: Untagged

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!171741748 &7502585849089161407
ArticulationBody:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1157728520783578}
m_Enabled: 1
m_Mass: 1
m_ParentAnchorPosition: {x: 0, y: 0, z: 0}
m_ParentAnchorRotation: {x: 0, y: 0, z: 0.70710677, w: 0.70710677}
m_AnchorPosition: {x: 0, y: 0, z: 0}
m_AnchorRotation: {x: 0, y: 0, z: 0.70710677, w: 0.70710677}
m_ComputeParentAnchor: 1
m_ArticulationJointType: 0
m_LinearX: 0
m_LinearY: 0
m_LinearZ: 0
m_Swing1: 2
m_Swing2: 2
m_Twist: 2
m_XDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_YDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_ZDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_LinearDamping: 0.05
m_AngularDamping: 0.05
m_JointFriction: 0.05
m_Immovable: 0
--- !u!1 &1395682910799436
GameObject:
m_ObjectHideFlags: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1654288206095398}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 3, z: 0}
m_LocalScale: {x: 2, y: 0.66, z: 2}
m_LocalPosition: {x: 0, y: 1, z: 0}
m_LocalScale: {x: 2, y: 0.22000001, z: 2}
m_Father: {fileID: 5577473675271150779}
m_Father: {fileID: 4340471134207970}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &33164921905814718

- component: {fileID: 135746602902751552}
- component: {fileID: 23595512991530936}
- component: {fileID: 4817652318333382091}
- component: {fileID: 5615031093200569454}
m_Layer: 0
m_Name: Goal
m_TagString: Untagged

agent: {fileID: 1395682910799436}
hand: {fileID: 1654288206095398}
goalOn: {fileID: 1065277484498824}
--- !u!54 &5615031093200569454
Rigidbody:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1986879271678326}
serializedVersion: 2
m_Mass: 1
m_Drag: 0
m_AngularDrag: 0.05
m_UseGravity: 0
m_IsKinematic: 1
m_Interpolate: 0
m_Constraints: 0
m_CollisionDetection: 0
--- !u!1 &1612288809266921535
GameObject:
m_ObjectHideFlags: 0

- component: {fileID: 7854106910146316266}
- component: {fileID: 3121518955567099780}
m_Layer: 0
m_Name: HandPart1
m_Name: ArmBone1
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_AngularDamping: 0.05
m_JointFriction: 0.05
m_Immovable: 0
--- !u!1 &2774910285752296781
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5113607647341911964}
- component: {fileID: 6077791648343126729}
m_Layer: 0
m_Name: Gripper
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 0
--- !u!4 &5113607647341911964
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2774910285752296781}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 5577473675271150779}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!171741748 &6077791648343126729
ArticulationBody:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2774910285752296781}
m_Enabled: 1
m_Mass: 1
m_ParentAnchorPosition: {x: 0, y: 0, z: 0}
m_ParentAnchorRotation: {x: 0, y: 0, z: 0.70710677, w: 0.70710677}
m_AnchorPosition: {x: 0, y: 0, z: 0}
m_AnchorRotation: {x: 0, y: 0, z: 0.70710677, w: 0.70710677}
m_ComputeParentAnchor: 1
m_ArticulationJointType: 0
m_LinearX: 0
m_LinearY: 0
m_LinearZ: 0
m_Swing1: 0
m_Swing2: 0
m_Twist: 0
m_XDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_YDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_ZDrive:
lowerLimit: 0
upperLimit: 0
stiffness: 0
damping: 0
forceLimit: 3.4028235e+38
target: 0
targetVelocity: 0
m_LinearDamping: 0.05
m_AngularDamping: 0.05
m_JointFriction: 0.05
m_Immovable: 0
--- !u!1 &2828438114980972834
GameObject:
m_ObjectHideFlags: 0

- component: {fileID: 5577473675271150779}
- component: {fileID: 3535560371474816925}
m_Layer: 0
m_Name: HandPart2
m_Name: ArmBone2
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_LocalPosition: {x: 0, y: -6, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 4910003592613346}
- {fileID: 5113607647341911964}
m_Father: {fileID: 7854106910146316266}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 180, y: 0, z: 0}

70
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacher.unity


propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 1685534645273415020, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorRotation.z
value: 0.7071067
objectReference: {fileID: 0}
- target: {fileID: 1685534645273415020, guid: 142b1673ab3eef5f098717cab90c53bf,
type: 3}
propertyPath: m_ParentAnchorRotation.w
value: 0.7071067
propertyPath: m_ParentAnchorPosition.y
value: 0
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
value: -0.00000047683716
value: 0
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
value: 0.7071067
value: 0.70710677
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
value: 0.7071067
value: 0.70710677
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorPosition.y
propertyPath: m_ParentAnchorPosition.z
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorPosition.z
value: 0
propertyPath: m_ParentAnchorRotation.x
value: 0.70710677
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
type: 3}
propertyPath: m_ParentAnchorRotation.y
value: -0.70710677
objectReference: {fileID: 0}
- target: {fileID: 7502585849089161407, guid: 142b1673ab3eef5f098717cab90c53bf,
value: 0.7071067
value: 0.70710677
- target: {fileID: 3121518955567099780, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 7502585849089161407, guid: 142b1673ab3eef5f098717cab90c53bf,
value: 0.7071067
value: 0.70710677
value: 0.7071067
value: 0.70710677
value: 0.7071067
value: 0.70710677
objectReference: {fileID: 0}
- target: {fileID: 6077791648343126729, guid: 142b1673ab3eef5f098717cab90c53bf,
type: 3}

- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 1685534645273415020, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorPosition.z
value: 0.00000047683716
propertyPath: m_ParentAnchorRotation.z
value: 0.7071067
- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 1685534645273415020, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorRotation.x
propertyPath: m_ParentAnchorRotation.w
- target: {fileID: 3535560371474816925, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
propertyPath: m_ParentAnchorRotation.y
value: -0.7071067
propertyPath: m_ParentAnchorPosition.z
value: -0.00000047683716
- target: {fileID: 7502585849089161407, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 7502585849089161407, guid: 142b1673ab3eef5f098717cab90c53bf,
- target: {fileID: 8210020804941752176, guid: 142b1673ab3eef5f098717cab90c53bf,
type: 3}
propertyPath: m_ParentAnchorRotation.w
value: 0.7071067

986
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/ArticulatedReacherManualControl.unity
文件差异内容过多而无法显示
查看文件

43
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ArticulatedReacherAgent.cs


/// </summary>
public override void CollectObservations()
{
AddVectorObs(pendulumA.transform.localPosition);
Vector3 pendulumAPosToLocalSpace = gameObject.transform.InverseTransformPoint(pendulumA.transform.position);
AddVectorObs(pendulumAPosToLocalSpace);
AddVectorObs(m_RbA.angularVelocity);
AddVectorObs(m_RbA.velocity);
// Below resulted in 1.691 after 1 M steps
AddVectorObs(gameObject.transform.InverseTransformVector(m_RbA.angularVelocity));
AddVectorObs(gameObject.transform.InverseTransformVector(m_RbA.velocity));
// Below resulted in 0.0732 after 1 M steps, not learning
//AddVectorObs(m_RbA.angularVelocity);
//AddVectorObs(m_RbA.velocity);
AddVectorObs(pendulumB.transform.localPosition);
Vector3 pendulumBPosToLocalSpace = gameObject.transform.InverseTransformPoint(pendulumB.transform.position);
AddVectorObs(pendulumBPosToLocalSpace);
AddVectorObs(m_RbB.angularVelocity);
AddVectorObs(m_RbB.velocity);
// Below resulted in 1.691 after 1 M steps
AddVectorObs(gameObject.transform.InverseTransformVector(m_RbB.angularVelocity));
AddVectorObs(gameObject.transform.InverseTransformVector(m_RbB.velocity));
// Below resulted in 0.0732 after 1 M steps, not learning
//AddVectorObs(m_RbB.angularVelocity);
//AddVectorObs(m_RbB.velocity);
AddVectorObs(goal.transform.localPosition);
AddVectorObs(hand.transform.localPosition);
Vector3 goalPosToLocalSpace = gameObject.transform.InverseTransformPoint(goal.transform.position);
AddVectorObs(goalPosToLocalSpace);
AddVectorObs(m_GoalSpeed);
Vector3 handPosToLocalSpace = gameObject.transform.InverseTransformPoint(hand.transform.position);
AddVectorObs(handPosToLocalSpace);
//AddVectorObs(m_GoalSpeed);
// Below resulted in 4.18 after 1 M steps and reached 37.52 after 1.25 M steps
AddVectorObs(Vector3.Distance(goalPosToLocalSpace, handPosToLocalSpace));
/// <summary>
/// The agent's four actions correspond to torques on each of the two joints.

m_GoalDegree += m_GoalSpeed;
UpdateGoalPosition();
float maxTorque = 150f;
//float maxTorque = 150f;
float maxTorque = 250f;
var torqueX = Mathf.Clamp(vectorAction[0], -1f, 1f) * maxTorque;
var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * maxTorque;

m_Deviation = m_MyAcademy.resetParameters["deviation"];
m_DeviationFreq = m_MyAcademy.resetParameters["deviation_freq"];
}
}

62
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ManualControlReacherArm.cs


public GameObject pendulumB;
public GameObject hand;
public GameObject goal;
private ArticulationBody m_RbA;
private ArticulationBody m_RbB;
private ArticulationBody m_AbA;
private ArticulationBody m_AbB;
private Rigidbody m_RbA;
private Rigidbody m_RbB;
public bool useAlternativeKeySetForInput = false;
public bool useArticulations = false;
/// <summary>
/// Collect the rigidbodies of the reacher in order to resue them for
/// observations and actions.

m_RbA = pendulumA.GetComponent<ArticulationBody>();
m_RbB = pendulumB.GetComponent<ArticulationBody>();
if (useArticulations)
{
m_AbA = pendulumA.GetComponent<ArticulationBody>();
m_AbB = pendulumB.GetComponent<ArticulationBody>();
}
else
{
m_RbA = pendulumA.GetComponent<Rigidbody>();
m_RbB = pendulumB.GetComponent<Rigidbody>();
}
m_TorqueA = m_TorqueB = Vector3.zero;
}

public void FixedUpdate()
{
//float maxTorque = 1000f;
float deltaTorque = 25f;
float deltaTorque = 150f;
m_TorqueB = Vector3.zero;
if (useAlternativeKeySetForInput && !Input.GetKey(KeyCode.RightBracket))
return;
// upper arm
if (Input.GetKey(KeyCode.A))
m_TorqueA.x += deltaTorque;
if (Input.GetKey(KeyCode.Z))

if (Input.GetKey(KeyCode.C))
m_TorqueA.z -= deltaTorque;
//m_TorqueA.x = Mathf.Clamp(m_TorqueA.x, -1.0f, 1.0f);
//m_TorqueA.y = Mathf.Clamp(m_TorqueA.y, -1.0f, 1.0f);
//m_TorqueA.z = Mathf.Clamp(m_TorqueA.z, -1.0f, 1.0f);
m_RbA.AddTorque(m_TorqueA);
m_TorqueB = Vector3.zero;
// lower arm
if (Input.GetKey(KeyCode.F))
m_TorqueB.x += deltaTorque;
if (Input.GetKey(KeyCode.V))

if (Input.GetKey(KeyCode.H))
m_TorqueB.z += deltaTorque;
if (Input.GetKey(KeyCode.N))
m_TorqueB.z -= deltaTorque;
m_TorqueB.z -= deltaTorque;
//m_TorqueA.x = Mathf.Clamp(m_TorqueA.x, -1.0f, 1.0f);
//m_TorqueA.y = Mathf.Clamp(m_TorqueA.y, -1.0f, 1.0f);
//m_TorqueA.z = Mathf.Clamp(m_TorqueA.z, -1.0f, 1.0f);
if (useArticulations)
{
m_AbA.AddTorque(m_TorqueA);
m_AbB.AddTorque(m_TorqueB);
}
else
{
m_RbA.AddTorque(m_TorqueA);
m_RbB.AddTorque(m_TorqueB);
}
m_RbB.AddTorque(m_TorqueB);
if (Input.GetKey(KeyCode.Escape))
AgentReset();
}

4
config/sac_trainer_config.yaml


ReacherLearning:
normalize: true
time_horizon: 1000
batch_size: 128
batch_size: 2048
max_steps: 2e5
max_steps: 2e6
summary_freq: 3000
HallwayLearning:

2
config/trainer_config.yaml


time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e6
max_steps: 1e7
summary_freq: 3000
reward_signals:
extrinsic:

正在加载...
取消
保存