浏览代码

added broadcast to the player and heuristic brain.

Allows the python API to record actions taken along with the states and rewards
/develop-generalizationTraining-TrainerController
vincentpierre 7 年前
当前提交
e36b8bf0
共有 7 个文件被更改,包括 134 次插入44 次删除
  1. 3
      python/unityagents/brain.py
  2. 28
      python/unityagents/environment.py
  3. 3
      unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs
  4. 4
      unity-environment/Assets/ML-Agents/Scripts/Communicator.cs
  5. 29
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
  6. 28
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs
  7. 83
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs

3
python/unityagents/brain.py


class BrainInfo:
def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None):
def __init__(self, observation, state, memory=None, reward=None, agents=None, local_done=None, action =None):
"""
Describes experience at current step of all agents linked to a brain.
"""

self.rewards = reward
self.local_done = local_done
self.agents = agents
self.actions = action
class BrainParameters:

28
python/unityagents/environment.py


self._data = {}
self._global_done = None
self._academy_name = p["AcademyName"]
self._num_brains = len(p["brainParameters"])
self._external_brain_names = p["externalBrainNames"]
self._external_brain_names = [] if self._external_brain_names is None else self._external_brain_names
self._num_brains = len(self._brain_names)
self._num_external_brains = len(self._external_brain_names)
print(p["brainParameters"][i])
self._conn.send(b".")
self._loaded = True
logger.info("\n'{}' started successfully!".format(self._academy_name))

return self._num_brains
@property
def number_external_brains(self):
return self._num_external_brains
@property
@property
def external_brain_names(self):
return self._external_brain_names
@staticmethod
def _process_pixels(image_bytes=None, bw=False):

rewards = state_dict["rewards"]
dones = state_dict["dones"]
agents = state_dict["agents"]
actions = state_dict["actions"]
observations = []
for o in range(self._brains[b].number_observations):

observations.append(np.array(obs_n))
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones)
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones, actions)
self._global_done = self._conn.recv(self._buffer_size).decode('utf-8') == 'True'

arr = [float(x) for x in arr]
return arr
def step(self, action, memory=None, value=None):
def step(self, action = None, memory=None, value=None):
"""
Provides the environment with an action, moves the environment dynamics forward accordingly, and returns
observation, state, and reward information to the agent.

:return: A Data structure corresponding to the new state of the environment.
"""
action = {} if action is None else action
if self._num_brains > 1:
if self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names a keys, "
"and actions as values".format(self._num_brains))

if self._num_brains > 1:
if self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and memories as values".format(self._num_brains))

if self._num_brains > 1:
if self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and state/action value estimates as values".format(self._num_brains))

for b in self._brain_names:
for b in self._external_brain_names:
n_agent = len(self._data[b].agents)
if b not in action:
raise UnityActionException("You need to input an action for the brain {0}".format(b))

3
unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DDecision.cs


{
if (gameObject.GetComponent<Brain>().brainParameters.actionSpaceType == StateType.continuous)
{
return new float[4]{ 0f, 0f, 0f, 0.0f };
return new float[2]{ -10*(state[4]-state[2]), -10*(state[2]+state[4])};
}
else

4
unity-environment/Assets/ML-Agents/Scripts/Communicator.cs


public Dictionary<string, float> resetParameters;
/**< \brief The default reset parameters are sent via socket*/
public List<string> brainNames;
/**< \brief A list of the External brains names sent via socket*/
/**< \brief A list of the all the brains names sent via socket*/
public List<string> externalBrainNames;
/**< \brief A list of the External brains names sent via socket*/
}
public enum ExternalCommand

29
unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs


public Brain brain;
/**< Reference to the brain that uses this CoreBrainHeuristic */
public bool broadcast;
/**< If true, the brain will send states / actions / rewards through the communicator */
ExternalCommunicator coord;
public Decision decision;
/**< Reference to the Decision component used to decide the actions */

public void InitializeCoreBrain()
{
decision = brain.gameObject.GetComponent<Decision>();
if (broadcast)
{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = new ExternalCommunicator(brain.gameObject.transform.parent.gameObject.GetComponent<Academy>());
brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator = coord;
coord.SubscribeBrain(brain);
}
else
{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
}
}
}
/// Uses the Decision Component to decide that action to take

/// Nothing needs to be implemented, the states are collected in DecideAction
public void SendState()
{
if (broadcast)
{
coord.giveBrainInfo(brain);
}
}
/// Displays an error if no decision component is attached to the brain

EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
broadcast = EditorGUILayout.Toggle("Broadcast", broadcast);
if (brain.gameObject.GetComponent<Decision>() == null)
{
EditorGUILayout.HelpBox("You need to add a 'Decision' component to this gameObject", MessageType.Error);

28
unity-environment/Assets/ML-Agents/Scripts/CoreBrainPlayer.cs


public float value;
}
public bool broadcast;
/**< If true, the brain will send states / actions / rewards through the communicator */
ExternalCommunicator coord;
[SerializeField]
/// Contains the mapping from input to continuous actions
private ContinuousPlayerAction[] continuousPlayerActions;

/// Nothing to implement
public void InitializeCoreBrain()
{
if (broadcast)
{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator == null)
{
coord = new ExternalCommunicator(brain.gameObject.transform.parent.gameObject.GetComponent<Academy>());
brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator = coord;
coord.SubscribeBrain(brain);
}
else
{
if (brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator is ExternalCommunicator)
{
coord = (ExternalCommunicator)brain.gameObject.transform.parent.gameObject.GetComponent<Academy>().communicator;
coord.SubscribeBrain(brain);
}
}
}
}
/// Uses the continuous inputs or dicrete inputs of the player to

/// decisions
public void SendState()
{
if (broadcast)
{
coord.giveBrainInfo(brain);
}
}
/// Displays continuous or discrete input mapping in the inspector

EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
SerializedObject serializedBrain = new SerializedObject(this);
broadcast = EditorGUILayout.Toggle("Broadcast", broadcast);
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
GUILayout.Label("Edit the continuous inputs for you actions", EditorStyles.boldLabel);

83
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


private class StepMessage
{
public string brain_name { get; set; }
public List<bool> dones { get; set; }
}

public Dictionary<string, List<float>> value { get; set; }
}

public Dictionary<string, float> parameters { get; set; }
public bool train_model { get; set; }
}

}
catch
{
throw new UnityAgentsException("One of the brains was set isExternal" +
" but Unity was unable to read the" +
" arguments passed at launch");
throw new UnityAgentsException("One of the brains was set to " +
"External mode or broadcast" +
" but Unity was unable to read the" +
" arguments passed at launch");
}
messageHolder = new byte[messageLength];

AcademyParameters accParamerters = new AcademyParameters();
accParamerters.brainParameters = new List<BrainParameters>();
accParamerters.brainNames = new List<string>();
accParamerters.externalBrainNames = new List<string>();
if (b.brainType == BrainType.External)
{
accParamerters.externalBrainNames.Add(b.gameObject.name);
}
}
accParamerters.AcademyName = academy.gameObject.name;
accParamerters.resetParameters = academy.resetParameters;

}
/// Sends Academy parameters to external agent
private void SendParameters(AcademyParameters envParams)
private void SendParameters(AcademyParameters envParams)
{
string envMessage = JsonConvert.SerializeObject(envParams, Formatting.Indented);
sender.Send(Encoding.ASCII.GetBytes(envMessage));

List<float> concatenatedRewards = new List<float>();
List<float> concatenatedMemories = new List<float>();
List<bool> concatenatedDones = new List<bool>();
List<float> concatenatedActions = new List<float>();
Dictionary<int, float[]> collectedActions = brain.CollectActions();
foreach (int id in current_agents[brainName])
{

concatenatedDones.Add(collectedDones[id]);
concatenatedActions = concatenatedActions.Concat(collectedActions[id].ToList()).ToList();
}
StepMessage message = new StepMessage()
{

rewards = concatenatedRewards,
//actions = actionDict,
actions = concatenatedActions,
memories = concatenatedMemories,
dones = concatenatedDones
};

foreach (Brain brain in brains)
{
string brainName = brain.gameObject.name;
if (brain.brainType == BrainType.External)
{
string brainName = brain.gameObject.name;
Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
if (brain.brainParameters.actionSpaceType == StateType.continuous)
Dictionary<int, float[]> actionDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
if (brain.brainParameters.actionSpaceType == StateType.continuous)
{
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i * brain.brainParameters.actionSize, brain.brainParameters.actionSize).ToArray());
}
else
{
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i, 1).ToArray());
}
else
storedActions[brainName] = actionDict;
Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
actionDict.Add(current_agents[brainName][i],
agentMessage.action[brainName].GetRange(i, 1).ToArray());
memoryDict.Add(current_agents[brainName][i],
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
}
storedActions[brainName] = actionDict;
storedMemories[brainName] = memoryDict;
Dictionary<int, float[]> memoryDict = new Dictionary<int, float[]>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
memoryDict.Add(current_agents[brainName][i],
agentMessage.memory[brainName].GetRange(i * brain.brainParameters.memorySize, brain.brainParameters.memorySize).ToArray());
}
storedMemories[brainName] = memoryDict;
Dictionary<int, float> valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
Dictionary<int, float> valueDict = new Dictionary<int, float>();
for (int i = 0; i < current_agents[brainName].Count; i++)
{
valueDict.Add(current_agents[brainName][i],
agentMessage.value[brainName][i]);
}
storedValues[brainName] = valueDict;
storedValues[brainName] = valueDict;
}
}

正在加载...
取消
保存