ml-agents/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs


								using System;

								using System.Collections;

								using System.Collections.Generic;

								using Unity.MLAgents;

								using UnityEngine;


								public class RewardManager : MonoBehaviour

								{

								    [Serializable]

								    public class Reward

								    {

								        public string rewardKey;

								//        [Range(.01f, .05f)]

								        public float rawVal;

								        public float rewardScalar = .01f;

								//        public float rewardScalar;

								        public float rewardThisStep;

								        public float cumulativeThisEpisode;

								        public float cumulativeThisSession;


								//        public float maxRewardThisSession;


								        public int lastNaNStep;

								//        public Reward(string k)

								//        public Reward()

								//        {

								////            rewardKey = k;

								//            rewardScalar = .01f;

								//        }

								    }


								    private Agent m_thisAgent;

								    public List<Reward> rewardsList = new List<Reward>();

								    public Dictionary<string, Reward> rewardsDict = new Dictionary<string, Reward>();

								    public float maxSteps;

								    private void OnEnable()

								//    private void Awake()

								    {

								        m_thisAgent = GetComponent<Agent>();

								        maxSteps = m_thisAgent.MaxStep;

								        foreach (var item in rewardsList)

								        {

								            if (rewardsDict.ContainsKey(item.rewardKey)) return; //don't need to add

								            rewardsDict.Add(item.rewardKey, item);

								        }

								    }


								//    public void AddReward(Reward r)

								//    {

								//        if (rewardsDict.ContainsKey(r.rewardKey)) return; //don't need to add

								//        rewardsDict.Add(r.rewardKey, r);

								//    }


								//    public void AddReward(string rewardKey)

								//    {

								//        if (rewardsDict.ContainsKey(rewardKey)) return; //don't need to add

								//        Reward newReward = new Reward(rewardKey);

								//        rewardsDict.Add(rewardKey, newReward);

								//        rewardsList.Add(newReward);

								//    }


								    //Add new rewards

								    public void UpdateReward(string key, float rawVal)

								    {

								        rewardsDict[key].rawVal = rawVal;

								        float scaledVal = rawVal * rewardsDict[key].rewardScalar;


								        //if we get a NaN, set the step

								        if (float.IsNaN(scaledVal))

								            rewardsDict[key].lastNaNStep = m_thisAgent.StepCount;


								//        rewardsDict[key].maxRewardThisSession = scaledVal * maxSteps;

								        rewardsDict[key].rewardThisStep = scaledVal;

								        rewardsDict[key].cumulativeThisEpisode += scaledVal;

								        rewardsDict[key].cumulativeThisSession += scaledVal;


								        m_thisAgent.AddReward(scaledVal);

								    }


								//    //Add new rewards

								//    public void UpdateReward(string key, float val)

								//    {

								//        rewardsDict[key].rewardThisStep = val;

								//        rewardsDict[key].cumulativeThisEpisode += val;

								//        rewardsDict[key].cumulativeThisSession += val;

								//        m_thisAgent.AddReward(val);

								//    }


								    //Resets cumulative episode reward

								    public void ResetEpisodeRewards()

								    {

								        foreach (var item in rewardsDict)

								        {

								            item.Value.rewardThisStep = 0;

								            item.Value.cumulativeThisEpisode = 0;

								        }

								    }


								    // Start is called before the first frame update

								    void Start()

								    {


								    }


								    // Update is called once per frame

								    void Update()

								    {


								    }

								}