CollectObservationBodyPart ( bodyPart , sensor ) ;
}
// print(m_OrientationCube.transform.rotation.eulerAngles);
// Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
AddReward (
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+ 0.01f * Vector3 . Dot ( m_OrientationCube . transform . forward ,
Vector3 . ClampMagnitude ( m_JdController . bodyPartsDict [ hips ] . rb . velocity , 3 ) )
+ 0.01f * Vector3 . Dot ( m_OrientationCube . transform . forward , hips . forward )
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
// .5f //penalize not looking at
+ 0.005f * ( head . position . y - shinL . position . y )
+ 0.005f * ( head . position . y - shinR . position . y )
// + 0.01f * (head.position.y - shinL.position.y)
// + 0.01f * (head.position.y - shinR.position.y)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
// 6, 9999)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
// 6, 9999)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
) ;
//// print(m_OrientationCube.transform.rotation.eulerAngles);
//// Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
// AddReward(
//// runForwardTowardsTargetReward
//// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
//// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
// +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
//
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
//// .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
//// .5f //penalize not looking at
//
// + 0.005f * (head.position.y - shinL.position.y)
// + 0.005f * (head.position.y - shinR.position.y)
//// + 0.01f * (head.position.y - shinL.position.y)
//// + 0.01f * (head.position.y - shinR.position.y)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
//// 6, 9999)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
//// 6, 9999)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
}
}
// void FixedUpdate()
// {
//// UpdateOrientationCube();
// //reward looking at
//// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//
//// print($"FacingRewardDot {facingReward}");
//// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
//// print($"VelRewardDot {velReward}");
//// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
//
//
//
//
//
//// //Multiplying these amplifies the reward.
//// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
//// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
//
//// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
//
//
//// print($"Combined {runForwardTowardsTargetReward}");
//// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// // Set reward for this step according to mixture of the following elements.
// // a. Velocity alignment with goal direction.
// // b. Rotation alignment with goal direction.
// // c. Encourage head height.
// // d. Discourage head movement.
void FixedUpdate ( )
{
// UpdateOrientationCube();
//reward looking at
// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// print($"FacingRewardDot {facingReward}");
// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
// print($"VelRewardDot {velReward}");
// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
// //Multiplying these amplifies the reward.
// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
// print($"Combined {runForwardTowardsTargetReward}");
// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// Set reward for this step according to mixture of the following elements.
// a. Velocity alignment with goal direction.
// b. Rotation alignment with goal direction.
// c. Encourage head height.
// d. Discourage head movement.
AddReward (
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+ 0.01f * Vector3 . Dot ( m_OrientationCube . transform . forward ,
Vector3 . ClampMagnitude ( m_JdController . bodyPartsDict [ hips ] . rb . velocity , 3 ) )
+ 0.01f * Vector3 . Dot ( m_OrientationCube . transform . forward , hips . forward )
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
// .5f //penalize not looking at
+ 0.005f * ( head . position . y - shinL . position . y )
+ 0.005f * ( head . position . y - shinR . position . y )
// + 0.01f * (head.position.y - shinL.position.y)
// + 0.01f * (head.position.y - shinR.position.y)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
// 6, 9999)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
// 6, 9999)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
) ;
// var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
// var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
// if (handLVel > 6)
// {
// AddReward(-0.005f * handLVel);
// }
// if (handRVel > 6)
// {
// AddReward(-0.005f * handRVel);
// }
// //SUNDAY VERSION
// +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
//
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
//// .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
//// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
// + 0.005f * (head.position.y - shinL.position.y)
// + 0.005f * (head.position.y - shinR.position.y)
//// + 0.01f * (head.position.y - shinL.position.y)
//// + 0.01f * (head.position.y - shinR.position.y)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
//// 6, 9999)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
//// 6, 9999)
//
//
//// var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
//// var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
//// if (handLVel > 6)
//// {
//// AddReward(-0.005f * handLVel);
//// }
//// if (handRVel > 6)
//// {
//// AddReward(-0.005f * handRVel);
//// }
//
//
//// //SUNDAY VERSION
//// AddReward(
////// runForwardTowardsTargetReward
////// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
////// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
////// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
//// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
////
////
////
////// + 0.02f * (head.position.y - hips.position.y)
////// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
////// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//
//
//
//
//// // Set reward for this step according to mixture of the following elements.
//// // a. Velocity alignment with goal direction.
//// // b. Rotation alignment with goal direction.
//// // c. Encourage head height.
//// // d. Discourage head movement.
//// m_WalkDir = target.position - m_OrientationCube.transform.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
// }
// // Set reward for this step according to mixture of the following elements.
// // a. Velocity alignment with goal direction.
// // b. Rotation alignment with goal direction.
// // c. Encourage head height.
// // d. Discourage head movement.
// m_WalkDir = target.position - m_OrientationCube.transform.position;
// AddReward(
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
// AddReward(
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
}
/// <summary>
/// Loop over body parts and reset them to initial conditions.