浏览代码

reduced maxAngVel, enabled enhanced determinism, cont spec

/hh-develop-ragdoll-testing
HH 5 年前
当前提交
f7e650a6
共有 8 个文件被更改,包括 2186 次插入86 次删除
  1. 3
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
  2. 999
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab
  3. 958
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  4. 239
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
  5. 2
      Project/ProjectSettings/DynamicsManager.asset
  6. 24
      config/trainer_config.yaml
  7. 36
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs
  8. 11
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs.meta

3
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs


startingPos = t.position,
startingRot = t.rotation
};
bp.rb.maxAngularVelocity = 100;
bp.rb.maxAngularVelocity = 30;
// bp.rb.maxAngularVelocity = 100;
// Add & setup the ground contact script
bp.groundContact = t.GetComponent<GroundContact>();

999
Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab
文件差异内容过多而无法显示
查看文件

958
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
文件差异内容过多而无法显示
查看文件

239
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs


Vector3 oCubePos = hips.position;
oCubePos.y = -.45f;
m_OrientationCube = Instantiate(Resources.Load<GameObject>("OrientationCube"), oCubePos, Quaternion.identity);
m_OrientationCube.transform.SetParent(transform);
m_OrientationCube.transform.SetParent(transform.parent);
UpdateOrientationCube();
m_JdController = GetComponent<JointDriveController>();
m_JdController.SetupBodyPart(hips);
m_JdController.SetupBodyPart(chest);

// var angularVelocityRelativeToLookRotationToTarget = m_worldPosMatrix.inverse.MultiplyVector(bp.rb.angularVelocity);
// sensor.AddObservation(angularVelocityRelativeToLookRotationToTarget);
//RELATIVE RB VELOCITIES
sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.velocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.angularVelocity));
//RELATIVE RB VELOCITIES --WAS
// sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.velocity));
// sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.angularVelocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity)); //best if cube fixed rot?
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity)); //best if cube fixed rot?
// sensor.AddObservation(bp.rb.velocity - m_JdController.bodyPartsDict[hips].rb.velocity);
// sensor.AddObservation(bp.rb.angularVelocity - m_JdController.bodyPartsDict[hips].rb.angularVelocity);
// sensor.AddObservation(rb.velocity);
// sensor.AddObservation(rb.angularVelocity);
// sensor.AddObservation(bp.rb.velocity);
// sensor.AddObservation(bp.rb.angularVelocity);
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPointUnscaled(bp.rb.position));
// sensor.AddObservation(m_OrientationCube.transform.InverseTransformPointUnscaled(bp.rb.position));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position)); //best
// sensor.AddObservation(hips.InverseTransformPointUnscaled(bp.rb.position));

sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, hips.rotation));
// sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, chest.rotation));
sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, head.rotation));
// m_TargetDirMatrix = Matrix4x4.TRS(Vector3.zero, m_LookRotation, Vector3.one);

// print(m_OrientationCube.transform.rotation.eulerAngles);
// Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
AddReward(
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+ 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
// .5f //penalize not looking at
+ 0.005f * (head.position.y - shinL.position.y)
+ 0.005f * (head.position.y - shinR.position.y)
// + 0.01f * (head.position.y - shinL.position.y)
// + 0.01f * (head.position.y - shinR.position.y)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
// 6, 9999)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
// 6, 9999)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
);
}

bpDict[forearmL].SetJointStrength(vectorAction[++i]);
bpDict[armR].SetJointStrength(vectorAction[++i]);
bpDict[forearmR].SetJointStrength(vectorAction[++i]);
// print(Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3)));
// print((Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f);
// print(Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation));
// print(Vector3.Dot(m_OrientationCube.transform.forward, hips.transform.forward));
}
void UpdateOrientationCube()

}
void FixedUpdate()
{
UpdateOrientationCube();
//reward looking at
// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// print($"FacingRewardDot {facingReward}");
// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
// print($"VelRewardDot {velReward}");
// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
// //Multiplying these amplifies the reward.
// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
// print($"Combined {runForwardTowardsTargetReward}");
// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// Set reward for this step according to mixture of the following elements.
// a. Velocity alignment with goal direction.
// b. Rotation alignment with goal direction.
// c. Encourage head height.
// d. Discourage head movement.
AddReward(
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+ 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+ 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
+ 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
);
// void FixedUpdate()
// {
//// UpdateOrientationCube();
// //reward looking at
//// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//
//// print($"FacingRewardDot {facingReward}");
//// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
//// print($"VelRewardDot {velReward}");
//// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
//
//
//
//
//
//// //Multiplying these amplifies the reward.
//// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
//// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
//
//// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
//
//
//// print($"Combined {runForwardTowardsTargetReward}");
//// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// m_WalkDir = target.position - m_OrientationCube.transform.position;
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
//// runForwardTowardsTargetReward
//// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
//// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
// +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
//
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
//// .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
//// .5f //penalize not looking at
//
// + 0.005f * (head.position.y - shinL.position.y)
// + 0.005f * (head.position.y - shinR.position.y)
//// + 0.01f * (head.position.y - shinL.position.y)
//// + 0.01f * (head.position.y - shinR.position.y)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
//// 6, 9999)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
//// 6, 9999)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
// AddReward(
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
}
//// var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
//// var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
//// if (handLVel > 6)
//// {
//// AddReward(-0.005f * handLVel);
//// }
//// if (handRVel > 6)
//// {
//// AddReward(-0.005f * handRVel);
//// }
//
//
//// //SUNDAY VERSION
//// AddReward(
////// runForwardTowardsTargetReward
////// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
////// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
////// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
//// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
////
////
////
////// + 0.02f * (head.position.y - hips.position.y)
////// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
////// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//
//
//
//
//// // Set reward for this step according to mixture of the following elements.
//// // a. Velocity alignment with goal direction.
//// // b. Rotation alignment with goal direction.
//// // c. Encourage head height.
//// // d. Discourage head movement.
//// m_WalkDir = target.position - m_OrientationCube.transform.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
// }
/// <summary>
/// Loop over body parts and reset them to initial conditions.

// transform.rotation = Quaternion.LookRotation(m_WalkDir);
// }
transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
UpdateOrientationCube();
// UpdateOrientationCube();
// transform.Rotate(Vector3.up, Random.Range(0.0f, 360.0f));

2
Project/ProjectSettings/DynamicsManager.asset


m_Extent: {x: 250, y: 250, z: 250}
m_WorldSubdivisions: 8
m_FrictionType: 0
m_EnableEnhancedDeterminism: 0
m_EnableEnhancedDeterminism: 1
m_EnableUnifiedHeightmaps: 1

24
config/trainer_config.yaml


strength: 1.0
gamma: 0.995
# WalkerDynamic:
# normalize: true
# num_epoch: 8
# time_horizon: 1000
# batch_size: 2048
# buffer_size: 20480
# # time_horizon: 128
# # batch_size: 512 #2048
# # buffer_size: 4096 #20480
# max_steps: 7.5e6
# summary_freq: 30000
# num_layers: 3
# hidden_units: 128
# beta: 1.0e-2
# learning_rate: 1.0e-3
# reward_signals:
# extrinsic:
# strength: 1.0
# gamma: 0.995
# curiosity:
# strength: 0.02
# gamma: 0.99
# encoding_size: 256
WalkerDynamic:
normalize: true
num_epoch: 3

36
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
[ExecuteAlways]
public class MeshSkewFix : MonoBehaviour
{
public bool fix;
public GameObject rootGameObject;
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
if (fix)
{
fix = false;
foreach (var t in GetComponentsInChildren<Transform>())
{
var joint = t.GetComponent<ConfigurableJoint>();
if (joint)
{
var meshFilter = t.GetComponent<MeshFilter>();
var meshRend = t.GetComponent<MeshFilter>();
}
}
}
}
}

11
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs.meta


fileFormatVersion: 2
guid: cdd5a22612a4949bbb5672f2dca17d3b
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存