您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
277 行
9.0 KiB
277 行
9.0 KiB
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using UnityEngine;
|
|
|
|
|
|
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#agent")]
|
|
/** Generic functions for parent Agent class.
|
|
* Contains all logic for Brain-Agent communication and Agent-Environment
|
|
* interaction.
|
|
*/
|
|
public abstract class Agent : MonoBehaviour
|
|
{
|
|
[Tooltip("The brain to register this agent to. Can be dragged into the inspector using the Editor.")]
|
|
/**< \brief The brain that will control this agent. */
|
|
/**< Use the inspector to drag the desired brain gameObject into
|
|
* the Brain field */
|
|
public Brain brain;
|
|
|
|
[Tooltip("A list of Cameras which will be used to generate observations.")]
|
|
/**< \brief The list of the cameras the Agent uses as observations. */
|
|
/**< These cameras will be used to generate the observations */
|
|
public List<Camera> observations;
|
|
|
|
[Tooltip("The per-agent maximum number of steps.")]
|
|
/**< \brief The number of steps the agent takes before being done. */
|
|
/**< If set to 0, the agent can only be set to done via a script.
|
|
* If set to any positive integer, the agent will be set to done after that
|
|
* many steps each episode. */
|
|
public int maxStep;
|
|
|
|
[Tooltip("If checked, the agent will reset on done. Else, AgentOnDone() will be called.")]
|
|
/**< \brief Determines the behaviour of the Agent when done.*/
|
|
/**< If true, the agent will reset when done.
|
|
* If not, the agent will remain done, and no longer take actions.*/
|
|
public bool resetOnDone = true;
|
|
|
|
// State list for the agent.
|
|
[HideInInspector]
|
|
public List<float> state;
|
|
|
|
//List of last n states.
|
|
[HideInInspector]
|
|
public List<float> stackedStates;
|
|
|
|
/**< \brief Describes the reward for the given step of the agent.*/
|
|
/**< It is reset to 0 at the beginning of every step.
|
|
* Modify in AgentStep().
|
|
* Should be set to positive to reinforcement desired behavior, and
|
|
* set to a negative value to punish undesireable behavior.
|
|
* Additionally, the magnitude of the reward should not exceed 1.0 */
|
|
[HideInInspector]
|
|
public float reward;
|
|
|
|
/**< \brief Whether or not the agent is done*/
|
|
/**< Set to true when the agent has acted in some way which ends the
|
|
* episode for the given agent. */
|
|
[HideInInspector]
|
|
public bool done;
|
|
|
|
/**< \brief Whether or not the max step is reached*/
|
|
[HideInInspector]
|
|
public bool maxStepReached;
|
|
|
|
/**< \brief The current value estimate of the agent */
|
|
/**< When using an External brain, you can pass value estimates to the
|
|
* agent at every step using env.Step(actions, values).
|
|
* If AgentMonitor is attached to the Agent, this value will be displayed.*/
|
|
[HideInInspector]
|
|
public float value;
|
|
|
|
/**< \brief Do not modify: This keeps track of the cumulative reward.*/
|
|
[HideInInspector]
|
|
public float CumulativeReward;
|
|
|
|
/**< \brief Do not modify: This keeps track of the number of steps taken by
|
|
* the agent each episode.*/
|
|
[HideInInspector]
|
|
public int stepCounter;
|
|
|
|
/**< \brief Do not modify: This keeps track of the last actions decided by
|
|
* the brain.*/
|
|
[HideInInspector]
|
|
public float[] agentStoredAction;
|
|
|
|
/**< \brief Do not modify directly: This is used by the brain to store
|
|
* information about the previous states of the agent*/
|
|
[HideInInspector]
|
|
public float[] memory;
|
|
|
|
/**< \brief Do not modify : This is the unique Identifier each agent
|
|
* receives at initialization. It is used by the brain to identify
|
|
* the agent.*/
|
|
[HideInInspector]
|
|
public int id;
|
|
|
|
void OnEnable()
|
|
{
|
|
id = gameObject.GetInstanceID();
|
|
if (brain != null)
|
|
{
|
|
brain.agents.Add(id, gameObject.GetComponent<Agent>());
|
|
if (brain.brainParameters.actionSpaceType == StateType.continuous)
|
|
{
|
|
agentStoredAction = new float[brain.brainParameters.actionSize];
|
|
}
|
|
else
|
|
{
|
|
agentStoredAction = new float[1];
|
|
}
|
|
memory = new float[brain.brainParameters.memorySize];
|
|
}
|
|
InitializeAgent();
|
|
}
|
|
|
|
void OnDisable()
|
|
{
|
|
//Remove the agent from the list of agents of the brain
|
|
brain.agents.Remove(id);
|
|
}
|
|
|
|
/// When GiveBrain is called, the agent unsubscribes from its
|
|
/// previous brain and subscribes to the one passed in argument.
|
|
/** Use this method to provide a brain to the agent via script.
|
|
* Do not modify brain directly.
|
|
@param b The Brain component the agent will subscribe to.*/
|
|
public void GiveBrain(Brain b)
|
|
{
|
|
RemoveBrain();
|
|
brain = b;
|
|
brain.agents.Add(id, gameObject.GetComponent<Agent>());
|
|
if (brain.brainParameters.actionSpaceType == StateType.continuous)
|
|
{
|
|
agentStoredAction = new float[brain.brainParameters.actionSize];
|
|
}
|
|
else
|
|
{
|
|
agentStoredAction = new float[1];
|
|
}
|
|
memory = new float[brain.brainParameters.memorySize];
|
|
}
|
|
|
|
/// When RemoveBrain is called, the agent unsubscribes from its brain.
|
|
/** Use this method to give a brain to an agent via script.
|
|
* Do not modify brain directly.
|
|
* If an agent does not have a brain, it will not update its actions.*/
|
|
public void RemoveBrain()
|
|
{
|
|
if (brain != null)
|
|
{
|
|
brain.agents.Remove(id);
|
|
}
|
|
}
|
|
|
|
/// Initialize the agent with this method
|
|
/** Must be implemented in agent-specific child class.
|
|
* This method called only once when the agent is enabled.
|
|
*/
|
|
public virtual void InitializeAgent()
|
|
{
|
|
state = new List<float>(brain.brainParameters.stateSize);
|
|
stackedStates = new List<float>(brain.brainParameters.stateSize * brain.brainParameters.stackedStates);
|
|
stackedStates.AddRange(new float[brain.brainParameters.stateSize * brain.brainParameters.stackedStates]);
|
|
}
|
|
|
|
/// Collect the states of the agent with this method
|
|
/** Must be implemented in agent-specific child class.
|
|
* This method called at every step and collects the state of the agent.
|
|
* The lenght of the output must be the same length as the state size field
|
|
* in the brain parameters of the brain the agent subscribes to.
|
|
* Note : The order of the elements in the state list is important.
|
|
* @returns state A list of floats corresponding to the state of the agent.
|
|
*/
|
|
|
|
public List<float> ClearAndCollectState() {
|
|
state.Clear();
|
|
CollectState();
|
|
stackedStates.RemoveRange(0, brain.brainParameters.stateSize);
|
|
stackedStates.AddRange(state);
|
|
return stackedStates;
|
|
}
|
|
|
|
public virtual List<float> CollectState()
|
|
{
|
|
return state;
|
|
}
|
|
|
|
/// Defines agent-specific behavior at every step depending on the action.
|
|
/** Must be implemented in agent-specific child class.
|
|
* Note: If your state is discrete, you need to convert your
|
|
* state into a list of float with length 1.
|
|
* @param action The action the agent receives from the brain.
|
|
*/
|
|
public virtual void AgentStep(float[] action)
|
|
{
|
|
|
|
}
|
|
|
|
|
|
/// Defines agent-specific behaviour when done
|
|
/** Must be implemented in agent-specific child class.
|
|
* Is called when the Agent is done if ResetOneDone is false.
|
|
* The agent will remain done.
|
|
* You can use this method to remove the agent from the scene.
|
|
*/
|
|
public virtual void AgentOnDone()
|
|
{
|
|
|
|
}
|
|
|
|
/// Defines agent-specific reset logic
|
|
/** Must be implemented in agent-specific child class.
|
|
* Is called when the academy is done.
|
|
* Is called when the Agent is done if ResetOneDone is true.
|
|
*/
|
|
public virtual void AgentReset()
|
|
{
|
|
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain to reset the agent.
|
|
public void Reset()
|
|
{
|
|
memory = new float[brain.brainParameters.memorySize];
|
|
stackedStates.Clear();
|
|
stackedStates.AddRange(new float[brain.brainParameters.stateSize * brain.brainParameters.stackedStates]);
|
|
stepCounter = 0;
|
|
AgentReset();
|
|
CumulativeReward = -reward;
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain to collect rewards.
|
|
public float CollectReward()
|
|
{
|
|
return reward;
|
|
}
|
|
|
|
public void SetCumulativeReward()
|
|
{
|
|
if (!done) {
|
|
CumulativeReward += reward;
|
|
}
|
|
else{
|
|
CumulativeReward = 0f;
|
|
}
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain to collect done.
|
|
public bool CollectDone()
|
|
{
|
|
return done;
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain give new action to the agent.
|
|
public void UpdateAction(float[] a)
|
|
{
|
|
agentStoredAction = a;
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain to make the agent perform a step.
|
|
public void Step()
|
|
{
|
|
AgentStep(agentStoredAction);
|
|
stepCounter += 1;
|
|
if ((stepCounter > maxStep) && (maxStep > 0))
|
|
{
|
|
done = true;
|
|
maxStepReached = true;
|
|
}
|
|
}
|
|
|
|
/// Do not modify : Is used by the brain to reset the Reward.
|
|
public void ResetReward()
|
|
{
|
|
reward = 0;
|
|
}
|
|
|
|
}
|