浏览代码

Improve memory management (#180)

* More efficiently allocate memory when sending states

* Code clean-up

* Additional changes

* More GC reduction

* Remove state list initialization from example environments

* Use built-in json tool to serialize state message

* Remove commented code

* Use more efficient CompareTag

* Comments before code

* Use type inference where appropriate
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
59a2bbe0
共有 20 个文件被更改,包括 280 次插入235 次删除
  1. 1
      python/unityagents/environment.py
  2. 1
      unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  3. 2
      unity-environment/Assets/ML-Agents/Examples/Area/Scripts/AreaAgent.cs
  4. 13
      unity-environment/Assets/ML-Agents/Examples/Area/Scripts/Push/PushAgent.cs
  5. 1
      unity-environment/Assets/ML-Agents/Examples/Area/Scripts/Wall/WallAgent.cs
  6. 1
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  7. 1
      unity-environment/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgentConfigurable.cs
  8. 7
      unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  9. 1
      unity-environment/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  10. 1
      unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  11. 46
      unity-environment/Assets/ML-Agents/Scripts/Academy.cs
  12. 64
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  13. 121
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  14. 20
      unity-environment/Assets/ML-Agents/Scripts/Communicator.cs
  15. 3
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs
  16. 8
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
  17. 32
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  18. 14
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs
  19. 159
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
  20. 19
      unity-environment/Assets/ML-Agents/Scripts/Monitor.cs

1
python/unityagents/environment.py


"The API number is not compatible between Unity and python. Python API : {0}, Unity API : "
"{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
"of ML-Agents.".format(self._python_api, self._unity_api))
self._data = {}
self._global_done = None
self._academy_name = p["AcademyName"]

1
unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(gameObject.transform.rotation.z);
state.Add(gameObject.transform.rotation.x);
state.Add((ball.transform.position.x - gameObject.transform.position.x));

2
unity-environment/Assets/ML-Agents/Examples/Area/Scripts/AreaAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add((transform.position.x - area.transform.position.x));
state.Add((transform.position.y - area.transform.position.y));
state.Add((transform.position.z + 5 - area.transform.position.z));

13
unity-environment/Assets/ML-Agents/Examples/Area/Scripts/Push/PushAgent.cs


public GameObject goalHolder;
public GameObject block;
Rigidbody rb;
Vector3 velocity;
Vector3 blockVelocity;
rb = GetComponent<Rigidbody>();
List<float> state = new List<float>();
Vector3 velocity = GetComponent<Rigidbody>().velocity;
Vector3 blockVelocity = block.GetComponent<Rigidbody>().velocity;
velocity = rb.velocity;
blockVelocity = block.GetComponent<Rigidbody>().velocity;
state.Add((transform.position.x - area.transform.position.x));
state.Add((transform.position.y - area.transform.position.y));
state.Add((transform.position.z + 5 - area.transform.position.z));

{
float xVariation = GameObject.Find("Academy").GetComponent<PushAcademy>().xVariation;
transform.position = new Vector3(Random.Range(-xVariation, xVariation), 1.1f, -8f) + area.transform.position;
GetComponent<Rigidbody>().velocity = new Vector3(0f, 0f, 0f);
rb.velocity = new Vector3(0f, 0f, 0f);
area.GetComponent<Area>().ResetArea();
}

1
unity-environment/Assets/ML-Agents/Examples/Area/Scripts/Wall/WallAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
Vector3 velocity = GetComponent<Rigidbody>().velocity;
state.Add((transform.position.x - area.transform.position.x));
state.Add((transform.position.y - area.transform.position.y));

1
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(position);
return state;
}

1
unity-environment/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgentConfigurable.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(body.transform.rotation.eulerAngles.x);
state.Add(body.transform.rotation.eulerAngles.y);
state.Add(body.transform.rotation.eulerAngles.z);

7
unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


int closestPitDistance = 2 * (int)academy.resetParameters["gridSize"];
GameObject currentClosestPit = academy.actorObjs[0];
GameObject agent = academy.actorObjs[0];
List<float> state = new List<float>();
if (actor.tag == "agent")
if (actor.CompareTag("agent"))
{
agent = actor;
state.Add(actor.transform.position.x / (gridSize + 1));

}
foreach (GameObject actor in academy.actorObjs)
{
if (actor.tag == "goal")
if (actor.CompareTag("goal"))
{
int distance = (int)Mathf.Abs(agent.transform.position.x - actor.transform.position.x) + (int)Mathf.Abs(agent.transform.position.z - actor.transform.position.z);
if (closestGoalDistance > distance)

}
}
if (actor.tag == "pit")
if (actor.CompareTag("pit"))
{
int distance = (int)Mathf.Abs(agent.transform.position.x - actor.transform.position.x) + (int)Mathf.Abs(agent.transform.position.z - actor.transform.position.z);
if (closestPitDistance > distance)

1
unity-environment/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(pendulumA.transform.rotation.x);
state.Add(pendulumA.transform.rotation.y);
state.Add(pendulumA.transform.rotation.z);

1
unity-environment/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


public override List<float> CollectState()
{
List<float> state = new List<float>();
state.Add(invertMult * gameObject.transform.position.x);
state.Add(gameObject.transform.position.y);
state.Add(invertMult * gameObject.GetComponent<Rigidbody>().velocity.x);

46
unity-environment/Assets/ML-Agents/Scripts/Academy.cs


private ScreenConfiguration inferenceConfiguration = new ScreenConfiguration(1280, 720, 5, 1.0f, 60);
[SerializeField]
private ResetParameter[] defaultResetParameters;
public Dictionary<string, float> resetParameters;
* Academy. You can modify these parameters when training with an External
* brain by passing a config dictionary at reset. Reference resetParameters
* in your AcademyReset() or AcademyStep() to modify elements in your
* environment at reset time. */
* Academy. You can modify these parameters when training with an External
* brain by passing a config dictionary at reset. Reference resetParameters
* in your AcademyReset() or AcademyStep() to modify elements in your
* environment at reset time. */
public Dictionary<string, float> resetParameters;
[HideInInspector]
public bool done;
* AcademyStep() at step time.
* If true, all agents done flags will be set to true.*/
* AcademyStep() at step time.
* If true, all agents done flags will be set to true.*/
public int episodeCount;
public bool done;
public int currentStep;
public int episodeCount;
[HideInInspector]
public int currentStep;
public Communicator communicator;
public Communicator communicator;
void Awake()
{

brain.SendDone();
}
brain.ResetIfDone();
}
SendState();
brain.SendState();
foreach (Brain brain in brains)
{
}
// Called before AcademyReset().

brain.ResetDoneAndReward();
}
}
// Instructs all brains to collect states from their agents.
private void SendState()
{
foreach (Brain brain in brains)
{
brain.SendState();
}
}
// Instructs all brains to process states to produce actions.

64
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


*/
public abstract class Agent : MonoBehaviour
{
public Brain brain;
public Brain brain;
public List<Camera> observations;
public List<Camera> observations;
public int maxStep;
public int maxStep;
public bool resetOnDone = true;
public bool resetOnDone = true;
[HideInInspector]
public float reward;
// State list for the agent.
public List<float> state;
* Modify in AgentStep().
* Should be set to positive to reinforcement desired behavior, and
* set to a negative value to punish undesireable behavior.
* Modify in AgentStep().
* Should be set to positive to reinforcement desired behavior, and
* set to a negative value to punish undesireable behavior.
public bool done;
public float reward;
* episode for the given agent. */
* episode for the given agent. */
public float value;
public bool done;
* agent at every step using env.Step(actions, values).
* If AgentMonitor is attached to the Agent, this value will be displayed.*/
* agent at every step using env.Step(actions, values).
* If AgentMonitor is attached to the Agent, this value will be displayed.*/
[HideInInspector]
public float value;
/**< \brief Do not modify: This keeps track of the cumulative reward.*/
/**< \brief Do not modify: This keeps track of the cumulative reward.*/
[HideInInspector]
public int stepCounter;
public float[] agentStoredAction;
public int stepCounter;
public float[] memory;
public float[] agentStoredAction;
public int id;
public float[] memory;
[HideInInspector]
public int id;
void OnEnable()
{

*/
public virtual void InitializeAgent()
{
state = new List<float>(brain.brainParameters.stateSize);
}
/// Collect the states of the agent with this method

* Note : The order of the elements in the state list is important.
* @returns state A list of floats corresponding to the state of the agent.
*/
public List<float> ClearAndCollectState() {
state.Clear();
CollectState();
return state;
}
List<float> state = new List<float>();
return state;
}

public void SetCumulativeReward()
{
CumulativeReward += reward;
//Debug.Log(reward);
}
/// Do not modify : Is used by the brain to collect done.

121
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


continuous}
;
/** Only need to be modified in the brain's inpector.
* Defines what is the resolution of the camera
*/

*/
public class Brain : MonoBehaviour
{
public BrainParameters brainParameters = new BrainParameters();
// Current agent info
public Dictionary<int, List<float>> currentStates = new Dictionary<int, List<float>>(32);
public Dictionary<int, List<Camera>> currentCameras = new Dictionary<int, List<Camera>>(32);
public Dictionary<int, float> currentRewards = new Dictionary<int, float>(32);
public Dictionary<int, bool> currentDones = new Dictionary<int, bool>(32);
public Dictionary<int, float[]> currentActions = new Dictionary<int, float[]>(32);
public Dictionary<int, float[]> currentMemories = new Dictionary<int, float[]>(32);
public BrainType brainType;
public BrainParameters brainParameters = new BrainParameters();
public BrainType brainType;
/**< \brief Keeps track of the agents which subscribe to this brain*/
/**< \brief Keeps track of the agents which subscribe to this brain*/
public CoreBrain coreBrain;
public CoreBrain coreBrain;
//Ensures the coreBrains are not dupplicated with the brains
[SerializeField]

}
public void CollectEverything() {
currentStates.Clear();
currentCameras.Clear();
currentRewards.Clear();
currentDones.Clear();
currentActions.Clear();
currentMemories.Clear();
foreach (KeyValuePair<int, Agent> idAgent in agents)
{
idAgent.Value.SetCumulativeReward();
List<float> states = idAgent.Value.ClearAndCollectState();
if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
}
if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
}
List<Camera> observations = idAgent.Value.observations;
if (observations.Count < brainParameters.cameraResolutions.Count())
{
throw new UnityAgentsException(string.Format(@"The number of observations does not match for agent {0}:
Was expecting at least {1} observation but received {2}.", idAgent.Value.gameObject.name, brainParameters.cameraResolutions.Count(), observations.Count));
}
currentStates.Add(idAgent.Key, states);
currentCameras.Add(idAgent.Key, observations);
currentRewards.Add(idAgent.Key, idAgent.Value.reward);
currentDones.Add(idAgent.Key, idAgent.Value.done);
currentActions.Add(idAgent.Key, idAgent.Value.agentStoredAction);
currentMemories.Add(idAgent.Key, idAgent.Value.memory);
}
}
Dictionary<int, List<float>> result = new Dictionary<int, List<float>>();
currentStates.Clear();
List<float> states = idAgent.Value.CollectState();
List<float> states = idAgent.Value.ClearAndCollectState();
if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:

throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
}
result.Add(idAgent.Key, states);
currentStates.Add(idAgent.Key, states);
return result;
return currentStates;
}
/// Collects the observations of all the agents which subscribe to this

Dictionary<int, List<Camera>> result = new Dictionary<int, List<Camera>>();
currentCameras.Clear();
foreach (KeyValuePair<int, Agent> idAgent in agents)
{
List<Camera> observations = idAgent.Value.observations;

Was expecting at least {1} observation but received {2}.", idAgent.Value.gameObject.name, brainParameters.cameraResolutions.Count(), observations.Count));
}
result.Add(idAgent.Key, observations);
currentCameras.Add(idAgent.Key, observations);
return result;
return currentCameras;
}

{
Dictionary<int, float> result = new Dictionary<int, float>();
currentRewards.Clear();
result.Add(idAgent.Key, idAgent.Value.reward);
currentRewards.Add(idAgent.Key, idAgent.Value.reward);
return result;
return currentRewards;
}
/// Collects the done flag of all the agents which subscribe to this brain

Dictionary<int, bool> result = new Dictionary<int, bool>();
currentDones.Clear();
result.Add(idAgent.Key, idAgent.Value.done);
currentDones.Add(idAgent.Key, idAgent.Value.done);
return result;
return currentDones;
}
/// Collects the actions of all the agents which subscribe to this brain

Dictionary<int, float[]> result = new Dictionary<int, float[]>();
currentActions.Clear();
result.Add(idAgent.Key, idAgent.Value.agentStoredAction);
currentActions.Add(idAgent.Key, idAgent.Value.agentStoredAction);
return result;
return currentActions;
}
/// Collects the memories of all the agents which subscribe to this brain

Dictionary<int, float[]> result = new Dictionary<int, float[]>();
currentMemories.Clear();
result.Add(idAgent.Key, idAgent.Value.memory);
currentMemories.Add(idAgent.Key, idAgent.Value.memory);
return result;
return currentMemories;
}
/// Takes a dictionary {id -> memories} and sends the memories to the

/// which are not done
public void Step()
{
List<Agent> agentsToIterate = agents.Values.ToList();
foreach (Agent agent in agentsToIterate)
foreach (Agent agent in agents.Values)
{
if (!agent.done)
{

/// Is used by the Academy to reset the agents if they are done
public void ResetIfDone()
{
List<Agent> agentsToIterate = agents.Values.ToList();
foreach (Agent agent in agentsToIterate)
foreach (Agent agent in agents.Values)
{
if (agent.done)
{

/// Is used by the Academy to reset all agents
public void Reset()
{
List<Agent> agentsToIterate = agents.Values.ToList();
foreach (Agent agent in agentsToIterate)
foreach (Agent agent in agents.Values)
{
agent.Reset();
agent.done = false;

/// (as list of float arrays)
public List<float[,,,]> GetObservationMatrixList(List<int> agent_keys)
{
List<float[,,,]> observation_matrix_list = new List<float[,,,]>();
var observation_matrix_list = new List<float[,,,]>();
int width = brainParameters.cameraResolutions[obs_number].width;
int height = brainParameters.cameraResolutions[obs_number].height;
bool bw = brainParameters.cameraResolutions[obs_number].blackAndWhite;
int pixels = 0;
var width = brainParameters.cameraResolutions[obs_number].width;
var height = brainParameters.cameraResolutions[obs_number].height;
var bw = brainParameters.cameraResolutions[obs_number].blackAndWhite;
var pixels = 0;
if (bw)
pixels = 1;
else

, width
, pixels];
int i = 0;
var i = 0;
foreach (int k in agent_keys)
{
Camera agent_obs = observations[k][obs_number];

20
unity-environment/Assets/ML-Agents/Scripts/Communicator.cs


*/
public struct AcademyParameters
{
public string AcademyName;
public string apiNumber;
public string AcademyName;
public string apiNumber;
/**< \brief The location of the logfile*/
/**< \brief The location of the logfile*/
public Dictionary<string, float> resetParameters;
public Dictionary<string, float> resetParameters;
/**< \brief A list of the all the brains names sent via socket*/
/**< \brief A list of the all the brains names sent via socket*/
public List<BrainParameters> brainParameters;
public List<string> externalBrainNames;
public List<BrainParameters> brainParameters;
public List<string> externalBrainNames;
}
public enum ExternalCommand

3
unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs


/// CoreBrain which decides actions via communication with an external system such as Python.
public class CoreBrainExternal : ScriptableObject, CoreBrain
{
/**< Reference to the brain that uses this CoreBrainExternal */
/**< Reference to the brain that uses this CoreBrainExternal */
ExternalCommunicator coord;

8
unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs


[SerializeField]
private bool broadcast = true;
public Brain brain;
public Brain brain;
public Decision decision;
public Decision decision;
/// Create the reference to the brain
public void SetBrain(Brain b)

throw new UnityAgentsException("The Brain is set to Heuristic, but no decision script attached to it");
}
Dictionary<int, float[]> actions = new Dictionary<int, float[]>();
Dictionary<int, float[]> new_memories = new Dictionary<int, float[]>();
var actions = new Dictionary<int, float[]>();
var new_memories = new Dictionary<int, float[]>();
Dictionary<int, List<float>> states = brain.CollectStates();
Dictionary<int, List<Camera>> observations = brain.CollectObservations();
Dictionary<int, float> rewards = brain.CollectRewards();

32
unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


{
Dictionary<int, List<float>> states = brain.CollectStates();
inputState = new float[currentBatchSize, brain.brainParameters.stateSize];
int i = 0;
var i = 0;
foreach (int k in agentKeys)
{
List<float> state_list = states[k];

{
Dictionary<int, float[]> old_memories = brain.CollectMemories();
inputOldMemories = new float[currentBatchSize, brain.brainParameters.memorySize];
int i = 0;
var i = 0;
foreach (int k in agentKeys)
{
float[] m = old_memories[k];

{
if (brain.brainParameters.stateSpaceType == StateType.discrete)
{
int[,] discreteInputState = new int[currentBatchSize, 1];
var discreteInputState = new int[currentBatchSize, 1];
for (int i = 0; i < currentBatchSize; i++)
{
discreteInputState[i, 0] = (int)inputState[i, 0];

// Create the recurrent tensor
if (hasRecurrent)
{
Dictionary<int, float[]> new_memories = new Dictionary<int, float[]>();
var new_memories = new Dictionary<int, float[]>();
int i = 0;
var i = 0;
float[] m = new float[brain.brainParameters.memorySize];
var m = new float[brain.brainParameters.memorySize];
for (int j = 0; j < brain.brainParameters.memorySize; j++)
{
m[j] = recurrent_tensor[i, j];

brain.SendMemories(new_memories);
}
Dictionary<int, float[]> actions = new Dictionary<int, float[]>();
var actions = new Dictionary<int, float[]>();
float[,] output = networkOutput[0].GetValue() as float[,];
int i = 0;
var output = networkOutput[0].GetValue() as float[,];
var i = 0;
float[] a = new float[brain.brainParameters.actionSize];
var a = new float[brain.brainParameters.actionSize];
for (int j = 0; j < brain.brainParameters.actionSize; j++)
{
a[j] = output[i, j];

else if (brain.brainParameters.actionSpaceType == StateType.discrete)
{
long[,] output = networkOutput[0].GetValue() as long[,];
int i = 0;
var i = 0;
float[] a = new float[1] { (float)(output[i, 0]) };
var a = new float[1] { (float)(output[i, 0]) };
actions.Add(k, a);
i++;
}

#if ENABLE_TENSORFLOW && UNITY_EDITOR
EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
broadcast = EditorGUILayout.Toggle("Broadcast", broadcast);
SerializedObject serializedBrain = new SerializedObject(this);
var serializedBrain = new SerializedObject(this);
SerializedProperty tfGraphModel = serializedBrain.FindProperty("graphModel");
var tfGraphModel = serializedBrain.FindProperty("graphModel");
serializedBrain.Update();
EditorGUILayout.ObjectField(tfGraphModel);
serializedBrain.ApplyModifiedProperties();

ObservationPlaceholderName[obs_number] = "observation_" + obs_number;
}
}
SerializedProperty opn = serializedBrain.FindProperty("ObservationPlaceholderName");
var opn = serializedBrain.FindProperty("ObservationPlaceholderName");
serializedBrain.Update();
EditorGUILayout.PropertyField(opn, true);
serializedBrain.ApplyModifiedProperties();

SerializedProperty tfPlaceholders = serializedBrain.FindProperty("graphPlaceholders");
var tfPlaceholders = serializedBrain.FindProperty("graphPlaceholders");
serializedBrain.Update();
EditorGUILayout.PropertyField(tfPlaceholders, true);
serializedBrain.ApplyModifiedProperties();

14
unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs


{
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
float[] action = new float[brain.brainParameters.actionSize];
var action = new float[brain.brainParameters.actionSize];
foreach (ContinuousPlayerAction cha in continuousPlayerActions)
{
if (Input.GetKey(cha.key))

}
Dictionary<int, float[]> actions = new Dictionary<int, float[]>();
var actions = new Dictionary<int, float[]>();
foreach (KeyValuePair<int, Agent> idAgent in brain.agents)
{
actions.Add(idAgent.Key, action);

else
{
float[] action = new float[1] { defaultAction };
var action = new float[1] { defaultAction };
foreach (DiscretePlayerAction dha in discretePlayerActions)
{
if (Input.GetKey(dha.key))

}
}
Dictionary<int, float[]> actions = new Dictionary<int, float[]>();
var actions = new Dictionary<int, float[]>();
foreach (KeyValuePair<int, Agent> idAgent in brain.agents)
{
actions.Add(idAgent.Key, action);

#if UNITY_EDITOR
EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
broadcast = EditorGUILayout.Toggle("Broadcast", broadcast);
SerializedObject serializedBrain = new SerializedObject(this);
var serializedBrain = new SerializedObject(this);
SerializedProperty chas = serializedBrain.FindProperty("continuousPlayerActions");
var chas = serializedBrain.FindProperty("continuousPlayerActions");
serializedBrain.Update();
EditorGUILayout.PropertyField(chas, true);
serializedBrain.ApplyModifiedProperties();

{
GUILayout.Label("Edit the discrete inputs for you actions", EditorStyles.boldLabel);
defaultAction = EditorGUILayout.IntField("Default Action", defaultAction);
SerializedProperty dhas = serializedBrain.FindProperty("discretePlayerActions");
var dhas = serializedBrain.FindProperty("discretePlayerActions");
serializedBrain.Update();
EditorGUILayout.PropertyField(dhas, true);
serializedBrain.ApplyModifiedProperties();

159
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


Dictionary<string, Dictionary<int, float[]>> storedMemories;
Dictionary<string, Dictionary<int, float>> storedValues;
// For Messages
List<float> concatenatedStates = new List<float>(1024);
List<float> concatenatedRewards = new List<float>(32);
List<float> concatenatedMemories = new List<float>(1024);
List<bool> concatenatedDones = new List<bool>(32);
List<float> concatenatedActions = new List<float>(1024);
private int comPort;
Socket sender;
byte[] messageHolder;

const string api = "API-2";
private class StepMessage
/// Placeholder for state information to send.
[System.Serializable]
public struct StepMessage
public string brain_name { get; set; }
public List<int> agents { get; set; }
public List<float> states { get; set; }
public List<float> rewards { get; set; }
public List<float> actions { get; set; }
public string brain_name;
public List<int> agents;
public List<float> states;
public List<float> rewards;
public List<float> actions;
public List<float> memories;
public List<bool> dones;
}
public List<float> memories { get; set; }
StepMessage sMessage;
string sMessageString;
public List<bool> dones { get; set; }
}
string rMessage;
private class AgentMessage
/// Placeholder for returned message.
struct AgentMessage
private class ResetParametersMessage
/// Placeholder for reset parameter message
struct ResetParametersMessage
public bool train_model { get; set; }
}

hasSentState[brain.gameObject.name] = false;
}
public bool CommunicatorHandShake(){
/// Attempts to make handshake with external API.
public bool CommunicatorHandShake()
{
try
{
ReadArgs();

sender = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
sender.Connect("localhost", comPort);
AcademyParameters accParamerters = new AcademyParameters();
var accParamerters = new AcademyParameters();
accParamerters.brainParameters = new List<BrainParameters>();
accParamerters.brainNames = new List<string>();
accParamerters.externalBrainNames = new List<string>();

accParamerters.resetParameters = academy.resetParameters;
SendParameters(accParamerters);
sMessage = new StepMessage();
void HandleLog(string logString, string stackTrace, LogType type)
{
void HandleLog(string logString, string stackTrace, LogType type)
{
}
}
/// Listens to the socket for a command and returns the corresponding
/// External Command.

public Dictionary<string, float> GetResetParameters()
{
sender.Send(Encoding.ASCII.GetBytes("CONFIG_REQUEST"));
ResetParametersMessage resetParams = JsonConvert.DeserializeObject<ResetParametersMessage>(Receive());
Receive();
var resetParams = JsonConvert.DeserializeObject<ResetParametersMessage>(rMessage);
academy.isInference = !resetParams.train_model;
return resetParams.parameters;
}

private void ReadArgs()
{
string[] args = System.Environment.GetCommandLineArgs();
string inputPort = "";
var inputPort = "";
for (int i = 0; i < args.Length; i++)
{
if (args[i] == "--port")

}
/// Receives messages from external agent
private string Receive()
private void Receive()
string message = Encoding.ASCII.GetString(messageHolder, 0, location);
return message;
rMessage = Encoding.ASCII.GetString(messageHolder, 0, location);
/// Ends connection and closes environment
private void OnApplicationQuit()

return bytes;
}
private byte[] AppendLength(byte[] input){
private byte[] AppendLength(byte[] input)
{
byte[] newArray = new byte[input.Length + 4];
input.CopyTo(newArray, 4);
System.BitConverter.GetBytes(input.Length).CopyTo(newArray, 0);

/// Collects the information from the brains and sends it accross the socket
public void giveBrainInfo(Brain brain)
{
string brainName = brain.gameObject.name;
var brainName = brain.gameObject.name;
List<float> concatenatedStates = new List<float>();
List<float> concatenatedRewards = new List<float>();
List<float> concatenatedMemories = new List<float>();
List<bool> concatenatedDones = new List<bool>();
List<float> concatenatedActions = new List<float>();
Dictionary<int, List<Camera>> collectedObservations = brain.CollectObservations();
Dictionary<int, List<float>> collectedStates = brain.CollectStates();
Dictionary<int, float> collectedRewards = brain.CollectRewards();
Dictionary<int, float[]> collectedMemories = brain.CollectMemories();
Dictionary<int, bool> collectedDones = brain.CollectDones();
Dictionary<int, float[]> collectedActions = brain.CollectActions();
brain.CollectEverything();
concatenatedStates.Clear();
concatenatedRewards.Clear();
concatenatedMemories.Clear();
concatenatedDones.Clear();
concatenatedActions.Clear();
concatenatedStates = concatenatedStates.Concat(collectedStates[id]).ToList();
concatenatedRewards.Add(collectedRewards[id]);
concatenatedMemories = concatenatedMemories.Concat(collectedMemories[id].ToList()).ToList();
concatenatedDones.Add(collectedDones[id]);
concatenatedActions = concatenatedActions.Concat(collectedActions[id].ToList()).ToList();
concatenatedStates.AddRange(brain.currentStates[id]);
concatenatedRewards.Add(brain.currentRewards[id]);
concatenatedMemories.AddRange(brain.currentMemories[id].ToList());
concatenatedDones.Add(brain.currentDones[id]);
concatenatedActions.AddRange(brain.currentActions[id].ToList());
StepMessage message = new StepMessage()
{
brain_name = brainName,
agents = current_agents[brainName],
states = concatenatedStates,
rewards = concatenatedRewards,
actions = concatenatedActions,
memories = concatenatedMemories,
dones = concatenatedDones
};
string envMessage = JsonConvert.SerializeObject(message, Formatting.Indented);
sender.Send(AppendLength(Encoding.ASCII.GetBytes(envMessage)));
sMessage.brain_name = brainName;
sMessage.agents = current_agents[brainName];
sMessage.states = concatenatedStates;
sMessage.rewards = concatenatedRewards;
sMessage.actions = concatenatedActions;
sMessage.memories = concatenatedMemories;
sMessage.dones = concatenatedDones;
sMessageString = JsonUtility.ToJson(sMessage);
sender.Send(AppendLength(Encoding.ASCII.GetBytes(sMessageString)));
Receive();
int i = 0;
foreach (resolution res in brain.brainParameters.cameraResolutions)

sender.Send(AppendLength(TexToByteArray(brain.ObservationToTex(collectedObservations[id][i], res.width, res.height))));
sender.Send(AppendLength(TexToByteArray(brain.ObservationToTex(brain.currentCameras[id][i], res.width, res.height))));
Receive();
}
i++;

{
// TO MODIFY --------------------------------------------
sender.Send(Encoding.ASCII.GetBytes("STEPPING"));
string a = Receive();
AgentMessage agentMessage = JsonConvert.DeserializeObject<AgentMessage>(a);
Receive();
var agentMessage = JsonConvert.DeserializeObject<AgentMessage>(rMessage);
string brainName = brain.gameObject.name;
var brainName = brain.gameObject.name;
Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
var actionDict = new Dictionary<int, float[]>();
var memoryDict = new Dictionary<int, float[]>();
var valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
if (brain.brainParameters.actionSpaceType == StateType.continuous)

actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i, 1).ToArray());
}
}
storedActions[brainName] = actionDict;
Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
storedActions[brainName] = actionDict;
Dictionary<int, float> valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
}
}
}

19
unity-environment/Assets/ML-Agents/Scripts/Monitor.cs


* @param value The value you want to display.
* @param displayType The type of display.
* @param target The transform you want to attach the information to.
*/
*/
string key,
object value,
MonitorType displayType = MonitorType.text,
string key,
object value,
MonitorType displayType = MonitorType.text,
Transform target = null)
{

}
if (!displayValues.ContainsKey(key))
{
DisplayValue dv = new DisplayValue();
var dv = new DisplayValue();
dv.time = Time.timeSinceLevelLoad;
dv.value = value;
dv.monitorDisplayType = displayType;

float paddingwidth = 10 * widthScaler;
float scale = 1f;
Vector2 origin = new Vector3(0, Screen.height);
var origin = new Vector3(0, Screen.height);
if (!(target == canvas.transform))
{
Vector3 cam2obj = target.position - Camera.main.transform.position;

valueStyle = GUI.skin.label;
valueStyle.clipping = TextClipping.Overflow;
valueStyle.wordWrap = false;
Texture2D texture = new Texture2D(1, 1, TextureFormat.ARGB32, false);
var texture = new Texture2D(1, 1, TextureFormat.ARGB32, false);
GUIStyle staticRectStyle = new GUIStyle();
var staticRectStyle = new GUIStyle();
staticRectStyle.normal.background = texture;
colorStyle[i] = staticRectStyle;
}

正在加载...
取消
保存