ml-agents/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


								using System;

								using MLAgentsExamples;

								using UnityEngine;

								using Unity.MLAgents;

								using Unity.MLAgentsExamples;

								using Unity.MLAgents.Sensors;

								using BodyPart = Unity.MLAgentsExamples.BodyPart;

								using Random = UnityEngine.Random;


								public class WalkerAgent : Agent

								{

								    [Header("Walk Speed")] [Range(0, 15)] public float walkingSpeed = 15; //The walking speed to try and achieve

								    float m_maxWalkingSpeed = 15; //The max walking speed


								    //Should the agent sample a new goal velocity each episode?

								    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin()

								    //If false, the goal velocity will be walkingSpeed

								    public bool randomizeWalkSpeedEachEpisode;

								//    Vector3 m_WalkDir; //Direction to the target


								    public enum WalkDirectionMethod

								    {

								        UseWorldDirection,

								        UseTarget

								    }


								    [Header("Walk Direction")] public WalkDirectionMethod walkDirectionMethod;

								    public Vector3 worldDirToWalk = Vector3.right;

								    public Transform target; //Target the agent will walk towards.


								    [Header("Body Parts")] public Transform hips;

								    public Transform chest;

								    public Transform spine;

								    public Transform head;

								    public Transform thighL;

								    public Transform shinL;

								    public Transform footL;

								    public Transform thighR;

								    public Transform shinR;

								    public Transform footR;

								    public Transform armL;

								    public Transform forearmL;

								    public Transform handL;

								    public Transform armR;

								    public Transform forearmR;

								    public Transform handR;


								    //This will be used as a stabilized model space reference point for observations

								    //Because ragdolls can move erratically during training, using a stabilized reference transform improves learning

								    OrientationCubeController m_OrientationCube;

								    DirectionIndicator m_DirectionIndicator;

								    JointDriveController m_JdController;


								    EnvironmentParameters m_ResetParams;


								    public override void Initialize()

								    {

								        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();

								        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();


								        //Setup each body part

								        m_JdController = GetComponent<JointDriveController>();

								        m_JdController.SetupBodyPart(hips);

								        m_JdController.SetupBodyPart(chest);

								        m_JdController.SetupBodyPart(spine);

								        m_JdController.SetupBodyPart(head);

								        m_JdController.SetupBodyPart(thighL);

								        m_JdController.SetupBodyPart(shinL);

								        m_JdController.SetupBodyPart(footL);

								        m_JdController.SetupBodyPart(thighR);

								        m_JdController.SetupBodyPart(shinR);

								        m_JdController.SetupBodyPart(footR);

								        m_JdController.SetupBodyPart(armL);

								        m_JdController.SetupBodyPart(forearmL);

								        m_JdController.SetupBodyPart(handL);

								        m_JdController.SetupBodyPart(armR);

								        m_JdController.SetupBodyPart(forearmR);

								        m_JdController.SetupBodyPart(handR);


								        m_ResetParams = Academy.Instance.EnvironmentParameters;


								        SetResetParameters();

								    }


								    /// <summary>

								    /// Loop over body parts and reset them to initial conditions.

								    /// </summary>

								    public override void OnEpisodeBegin()

								    {

								//        if (walkTowardsType == WalkTowardsType.UseTarget && !target)

								//        {

								//            Debug.LogError("Missing a reference toTarget");

								//            Instantiate(targetPrefab)

								//        }

								        //Reset all of the body parts

								        foreach (var bodyPart in m_JdController.bodyPartsDict.Values)

								        {

								            bodyPart.Reset(bodyPart);

								        }


								        //Random start rotation to help generalize

								        hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);


								        UpdateOrientationObjects();


								        rewardManager.ResetEpisodeRewards();


								        //Set our goal walking speed

								        walkingSpeed =

								            randomizeWalkSpeedEachEpisode ? Random.Range(0.0f, m_maxWalkingSpeed) : walkingSpeed;


								        SetResetParameters();

								    }


								    /// <summary>

								    /// Add relevant information on each body part to observations.

								    /// </summary>

								    public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)

								    {

								        //GROUND CHECK

								        sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground


								        //Get velocities in the context of our orientation cube's space

								        //Note: You can get these velocities in world space as well but it may not train as well.

								        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));

								        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));


								        //Get position relative to hips in the context of our orientation cube's space

								        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));


								        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)

								        {

								            sensor.AddObservation(bp.rb.transform.localRotation);

								            sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);

								        }

								    }


								    /// <summary>

								    /// Loop over body parts to add them to observation.

								    /// </summary>

								    public override void CollectObservations(VectorSensor sensor)

								    {

								        var cubeForward = m_OrientationCube.transform.forward;


								        //current ragdoll velocity. normalized

								        sensor.AddObservation(GetMatchingVelocityInverseLerp(cubeForward * walkingSpeed, GetAvgVelocity()));


								        //current speed goal. normalized.

								        sensor.AddObservation(walkingSpeed / m_maxWalkingSpeed);

								        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));

								        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));


								        Vector3 targetPos = walkDirectionMethod == WalkDirectionMethod.UseTarget

								            ? target.transform.position

								//            : hips.position + (dirToLook * 100);

								//            : m_OrientationCube.transform.TransformDirection(dirToLook * 100);

								            : m_OrientationCube.transform.position + (cubeForward * 100);

								        targetPos.y = 0;

								        Vector3 relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(targetPos), 100);

								        sensor.AddObservation(relPos);

								//        Debug.DrawRay(targetPos, Vector3.up, Color.green,1);

								//        Debug.DrawRay(m_OrientationCube.transform.InverseTransformPoint(targetPos), Vector3.up * 2, Color.red,5);

								//        sensor.AddObservation(Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(target.transform.position), 100)

								//        sensor.AddObservation(targetPos);


								        foreach (var bodyPart in m_JdController.bodyPartsList)

								        {

								            CollectObservationBodyPart(bodyPart, sensor);

								        }

								    }


								    public override void OnActionReceived(float[] vectorAction)

								    {

								        var bpDict = m_JdController.bodyPartsDict;

								        var i = -1;


								        bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);

								        bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);


								        bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);

								        bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);

								        bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);

								        bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);

								        bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);

								        bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);


								        bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);

								        bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);

								        bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);

								        bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);

								        bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);


								        //update joint strength settings

								        bpDict[chest].SetJointStrength(vectorAction[++i]);

								        bpDict[spine].SetJointStrength(vectorAction[++i]);

								        bpDict[head].SetJointStrength(vectorAction[++i]);

								        bpDict[thighL].SetJointStrength(vectorAction[++i]);

								        bpDict[shinL].SetJointStrength(vectorAction[++i]);

								        bpDict[footL].SetJointStrength(vectorAction[++i]);

								        bpDict[thighR].SetJointStrength(vectorAction[++i]);

								        bpDict[shinR].SetJointStrength(vectorAction[++i]);

								        bpDict[footR].SetJointStrength(vectorAction[++i]);

								        bpDict[armL].SetJointStrength(vectorAction[++i]);

								        bpDict[forearmL].SetJointStrength(vectorAction[++i]);

								        bpDict[armR].SetJointStrength(vectorAction[++i]);

								        bpDict[forearmR].SetJointStrength(vectorAction[++i]);

								    }


								    //Update OrientationCube and DirectionIndicator

								    void UpdateOrientationObjects()

								    {

								        worldDirToWalk = walkDirectionMethod == WalkDirectionMethod.UseTarget

								            ? target.position - hips.position

								            : worldDirToWalk;

								        m_OrientationCube.UpdateOrientation(hips.position, worldDirToWalk);

								        m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);

								    }


								    void FixedUpdate()

								    {

								        UpdateOrientationObjects();

								        var cubeForward = m_OrientationCube.transform.forward;


								        // Set reward for this step according to mixture of the following elements.

								        // a. Match target speed

								        //This reward will approach 1 if it matches perfectly and approach zero as it deviates

								        var matchSpeedReward = GetMatchingVelocityInverseLerp(cubeForward * walkingSpeed, GetAvgVelocity());


								        // b. Rotation alignment with target direction.

								        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates

								        var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;

								        // c. Encourage head height.

								//        var headHeightOverFeetReward =

								//            Mathf.Clamp01(((head.position.y - footL.position.y) + (head.position.y - footR.position.y))/ 10); //Should normalize to ~1


								        rewardManager.rewardsDict["matchSpeed"].rewardThisStep = matchSpeedReward;

								        rewardManager.rewardsDict["lookAtTarget"].rewardThisStep = lookAtTargetReward;

								//        rewardManager.rewardsDict["headHeightOverFeet"].rewardThisStep = headHeightOverFeetReward;

								//        rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);

								        rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward);

								    }


								    //Returns the average velocity of all the rigidbodies

								    Vector3 GetAvgVelocity()

								    {

								        Vector3 velSum = Vector3.zero;

								        Vector3 avgVel = Vector3.zero;


								        //ALL RBS

								        int numOfRB = 0;

								        foreach (var item in m_JdController.bodyPartsList)

								        {

								            numOfRB++;

								            velSum += item.rb.velocity;

								        }


								        avgVel = velSum / numOfRB;

								        return avgVel;

								    }


								//    public float headHeightOverFeetReward; //reward for standing up straight-ish

								    public RewardManager rewardManager;


								    //normalized value of the difference in avg speed vs goal walking speed.

								    public float GetMatchingVelocityInverseLerp(Vector3 velocityGoal, Vector3 actualVelocity)

								    {

								        //distance between our actual velocity and goal velocity

								        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, walkingSpeed);


								        //get the value on a declining sigmoid shaped curve that decays from 1 to 0

								        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / walkingSpeed, 2), 2);

								    }


								    /// <summary>

								    /// Agent touched the target

								    /// </summary>

								    public void TouchedTarget()

								    {

								        AddReward(1f);

								    }


								    public void SetTorsoMass()

								    {

								        m_JdController.bodyPartsDict[chest].rb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);

								        m_JdController.bodyPartsDict[spine].rb.mass = m_ResetParams.GetWithDefault("spine_mass", 8);

								        m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);

								    }


								    public void SetResetParameters()

								    {

								        SetTorsoMass();

								    }

								}