浏览代码

Remove discrete observations

/develop-generalizationTraining-TrainerController
Arthur Juliani 7 年前
当前提交
5e48766d
共有 21 个文件被更改,包括 602 次插入723 次删除
  1. 33
      python/unityagents/brain.py
  2. 3
      python/unityagents/communicator.py
  3. 3
      python/unityagents/environment.py
  4. 8
      python/unitytrainers/bc/trainer.py
  5. 60
      python/unitytrainers/models.py
  6. 38
      python/unitytrainers/ppo/models.py
  7. 25
      python/unitytrainers/ppo/trainer.py
  8. 10
      unity-environment/Assets/ML-Agents/Editor/BrainEditor.cs
  9. 171
      unity-environment/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  10. 28
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  11. 165
      unity-environment/Assets/ML-Agents/Examples/Basic/TFModels/Basic.bytes
  12. 2
      unity-environment/Assets/ML-Agents/Examples/Basic/TFModels/Basic.bytes.meta
  13. 78
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  14. 2
      unity-environment/Assets/ML-Agents/Scripts/Batcher.cs
  15. 22
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  16. 2
      unity-environment/Assets/ML-Agents/Scripts/CoreBrain.cs
  17. 13
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs
  18. 9
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs
  19. 649
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  20. 2
      unity-environment/Assets/ML-Agents/Scripts/RpcCommunicator.cs
  21. 2
      unity-environment/Assets/ML-Agents/Scripts/UnityAgentsException.cs

33
python/unityagents/brain.py


class BrainInfo:
def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
"""
Describes experience at current step of all agents linked to a brain.
"""

self.vector_action_space_size = brain_param["vectorActionSize"]
self.vector_action_descriptions = brain_param["vectorActionDescriptions"]
self.vector_action_space_type = ["discrete", "continuous"][brain_param["vectorActionSpaceType"]]
self.vector_observation_space_type = ["discrete", "continuous"][brain_param["vectorObservationSpaceType"]]
return '''Unity brain name: {0}
Number of Visual Observations (per agent): {1}
Vector Observation space type: {2}
Vector Observation space size (per agent): {3}
Number of stacked Vector Observation: {4}
Vector Action space type: {5}
Vector Action space size (per agent): {6}
Vector Action descriptions: {7}'''.format(self.brain_name,
str(self.number_visual_observations),
self.vector_observation_space_type,
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))
return '''Unity brain name: {}
Number of Visual Observations (per agent): {}
Vector Observation space size (per agent): {}
Number of stacked Vector Observation: {}
Vector Action space type: {}
Vector Action space size (per agent): {}
Vector Action descriptions: {}'''.format(self.brain_name,
str(self.number_visual_observations),
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))

3
python/unityagents/communicator.py


class Communicator(object):
def __init__(self, worker_id=0,
base_port=5005):
def __init__(self, worker_id=0, base_port=5005):
"""
Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.

3
python/unityagents/environment.py


"cameraResolutions": resolution,
"vectorActionSize": brain_param.vector_action_size,
"vectorActionDescriptions": brain_param.vector_action_descriptions,
"vectorActionSpaceType": brain_param.vector_action_space_type,
"vectorObservationSpaceType": brain_param.vector_observation_space_type
"vectorActionSpaceType": brain_param.vector_action_space_type
})
if brain_param.brain_type == 2:
self._external_brain_names += [brain_param.brain_name]

8
python/unitytrainers/bc/trainer.py


self.training_buffer = Buffer()
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
if self.use_observations:
logger.info('Cannot use observations with imitation learning')

feed_dict[self.model.true_action] = batch_actions.reshape([-1, self.brain.vector_action_space_size])
else:
feed_dict[self.model.true_action] = batch_actions.reshape([-1])
if not self.is_continuous_observation:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.num_stacked_vector_observations])
else:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
self.brain.num_stacked_vector_observations])
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
self.brain.num_stacked_vector_observations])
if self.use_observations:
for i, _ in enumerate(self.model.visual_in):
_obs = np.array(_buffer['visual_observations%d' % i][start:end])

60
python/unitytrainers/models.py


:param o_size: Size of stacked vector observation.
:return:
"""
if self.brain.vector_observation_space_type == "continuous":
self.vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, name=name)
if self.normalize:
self.running_mean = tf.get_variable("running_mean", [self.o_size], trainable=False, dtype=tf.float32,
initializer=tf.zeros_initializer())
self.running_variance = tf.get_variable("running_variance", [self.o_size], trainable=False,
dtype=tf.float32, initializer=tf.ones_initializer())
self.update_mean, self.update_variance = self.create_normalizer_update(self.vector_in)
self.vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, name=name)
if self.normalize:
self.running_mean = tf.get_variable("running_mean", [self.o_size], trainable=False, dtype=tf.float32,
initializer=tf.zeros_initializer())
self.running_variance = tf.get_variable("running_variance", [self.o_size], trainable=False,
dtype=tf.float32, initializer=tf.ones_initializer())
self.update_mean, self.update_variance = self.create_normalizer_update(self.vector_in)
self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)), -5, 5,
name="normalized_state")
return self.normalized_state
else:
return self.vector_in
self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
self.running_variance / (tf.cast(self.global_step, tf.float32) + 1)), -5, 5,
name="normalized_state")
return self.normalized_state
self.vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='vector_observation')
return self.vector_in
def create_normalizer_update(self, vector_input):

num_layers, scope, reuse)
return hidden_flat
@staticmethod
def create_discrete_observation_encoder(observation_input, s_size, h_size, activation,
num_layers, scope, reuse):
"""
Builds a set of hidden state encoders from discrete state input.
:param reuse: Whether to re-use the weights within the same scope.
:param scope: The scope of the graph within which to create the ops.
:param observation_input: Discrete observation.
:param s_size: state input size (discrete).
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:return: List of hidden layer tensors.
"""
with tf.variable_scope(scope):
vector_in = tf.reshape(observation_input, [-1])
state_onehot = tf.one_hot(vector_in, s_size)
hidden = state_onehot
for i in range(num_layers):
hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation,
reuse=reuse, name="hidden_{}".format(i))
return hidden
def create_observation_streams(self, num_streams, h_size, num_layers):
"""
Creates encoding stream for observations.

visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if brain.vector_observation_space_size > 0:
if brain.vector_observation_space_type == "continuous":
hidden_state = self.create_continuous_observation_encoder(vector_observation_input,
h_size, activation_fn, num_layers,
"main_graph_{}".format(i), False)
else:
hidden_state = self.create_discrete_observation_encoder(vector_observation_input, self.o_size,
h_size, activation_fn, num_layers,
"main_graph_{}".format(i), False)
hidden_state = self.create_continuous_observation_encoder(vector_observation_input,
h_size, activation_fn, num_layers,
"main_graph_{}".format(i), False)
if hidden_state is not None and hidden_visual is not None:
final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
elif hidden_state is None and hidden_visual is not None:

38
python/unitytrainers/ppo/models.py


encoded_next_state_list.append(hidden_next_visual)
if self.o_size > 0:
if self.brain.vector_observation_space_type == "continuous":
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32,
name='next_vector_observation')
encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in,
self.curiosity_enc_size,
self.swish, 2,
"vector_obs_encoder",
True)
else:
self.next_vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32,
name='next_vector_observation')
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32,
name='next_vector_observation')
encoded_vector_obs = self.create_discrete_observation_encoder(self.vector_in, self.o_size,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_discrete_observation_encoder(self.next_vector_in, self.o_size,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
True)
encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in,
self.curiosity_enc_size,
self.swish, 2,
"vector_obs_encoder",
True)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)

25
python/unitytrainers/ppo/trainer.py


self.cumulative_rewards = {}
self.episode_steps = {}
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_visual_obs = (env.brains[brain_name].number_visual_observations > 0)
self.use_vector_obs = (env.brains[brain_name].vector_observation_space_size > 0)
self.summary_path = trainer_parameters['summary_path']

self.inference_run_list.append(self.model.output_pre)
if self.use_recurrent:
self.inference_run_list.extend([self.model.memory_out])
if (self.is_training and self.is_continuous_observation and
self.use_vector_obs and self.trainer_parameters['normalize']):
if self.is_training and self.use_vector_obs and self.trainer_parameters['normalize']:
self.inference_run_list.extend([self.model.update_mean, self.model.update_variance])
def __str__(self):

if self.use_recurrent:
feed_dict[self.model.prev_action] = np.array(buffer['prev_action'][start:end]).flatten()
if self.use_vector_obs:
if self.is_continuous_observation:
total_observation_length = self.brain.vector_observation_space_size * \
self.brain.num_stacked_vector_observations
feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape(
[-1, total_observation_length])
if self.use_curiosity:
feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \
.reshape([-1, total_observation_length])
else:
feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape(
[-1, self.brain.num_stacked_vector_observations])
if self.use_curiosity:
feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \
.reshape([-1, self.brain.num_stacked_vector_observations])
total_observation_length = self.brain.vector_observation_space_size * \
self.brain.num_stacked_vector_observations
feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape(
[-1, total_observation_length])
if self.use_curiosity:
feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \
.reshape([-1, total_observation_length])
if self.use_visual_obs:
for i, _ in enumerate(self.model.visual_in):
_obs = np.array(buffer['visual_obs%d' % i][start:end])

10
unity-environment/Assets/ML-Agents/Editor/BrainEditor.cs


EditorGUILayout.LabelField("Vector Observation");
EditorGUI.indentLevel++;
SerializedProperty bpVectorObsType =
serializedBrain.FindProperty("brainParameters.vectorObservationSpaceType");
EditorGUILayout.PropertyField(bpVectorObsType, new GUIContent("Space Type",
"Corresponds to whether state " +
"vector contains a single integer (Discrete) " +
"or a series of real-valued floats (Continuous)."));
SerializedProperty bpVectorObsSize =
serializedBrain.FindProperty("brainParameters.vectorObservationSize");
EditorGUILayout.PropertyField(bpVectorObsSize, new GUIContent("Space Size",

EditorGUI.indentLevel = indentLevel;
SerializedProperty bt = serializedBrain.FindProperty("brainType");
EditorGUILayout.PropertyField(bt);
if (bt.enumValueIndex < 0)
{

171
unity-environment/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


--- !u!104 &2
RenderSettings:
m_ObjectHideFlags: 0
serializedVersion: 8
serializedVersion: 9
m_Fog: 0
m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
m_FogMode: 3

m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_EnableBakedLightmaps: 1
m_EnableRealtimeLightmaps: 1
m_LightmapEditorSettings:
serializedVersion: 9
serializedVersion: 10
m_TextureWidth: 1024
m_TextureHeight: 1024
m_AtlasSize: 1024
m_AO: 0
m_AOMaxDistance: 1
m_CompAOExponent: 1

m_PVRFilteringAtrousPositionSigmaDirect: 0.5
m_PVRFilteringAtrousPositionSigmaIndirect: 2
m_PVRFilteringAtrousPositionSigmaAO: 1
m_ShowResolutionOverlay: 1
m_LightingDataAsset: {fileID: 0}
m_UseShadowmask: 1
--- !u!196 &4

debug:
m_Flags: 0
m_NavMeshData: {fileID: 0}
--- !u!114 &35309571
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 846768605}
--- !u!1 &282272644
GameObject:
m_ObjectHideFlags: 0

- component: {fileID: 282272645}
- component: {fileID: 282272649}
m_Layer: 0
m_Name: Agent
m_Name: BasicAgent
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 260483cdfc6b14e26823a02f23bd8baa, type: 2}
m_StaticBatchInfo:

onDemandDecision: 1
numberOfActionsBetweenDecisions: 1
timeBetweenDecisionsAtInference: 0.15
position: 0
smallGoalPosition: -3
largeGoalPosition: 7
minPosition: -10
maxPosition: 10
--- !u!114 &339558607
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
graphModel: {fileID: 4900000, guid: 8786b6500d406497c959f24c2a8b59ac, type: 3}
graphScope:
graphPlaceholders: []
BatchSizePlaceholderName: batch_size
VectorObservationPlacholderName: vector_observation
RecurrentInPlaceholderName: recurrent_in
RecurrentOutPlaceholderName: recurrent_out
VisualObservationPlaceholderName: []
ActionPlaceholderName: action
PreviousActionPlaceholderName: prev_action
brain: {fileID: 846768605}
--- !u!114 &703554261
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
brain: {fileID: 0}
--- !u!1 &762086410
GameObject:
m_ObjectHideFlags: 0

-
-
vectorActionSpaceType: 0
vectorObservationSpaceType: 0
- {fileID: 1458832067}
- {fileID: 1183791066}
- {fileID: 1066285776}
- {fileID: 977008778}
instanceID: 21298
--- !u!114 &977008778
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 8b23992c8eb17439887f5e944bf04a40, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
graphModel: {fileID: 4900000, guid: 8786b6500d406497c959f24c2a8b59ac, type: 3}
graphScope:
graphPlaceholders: []
BatchSizePlaceholderName: batch_size
VectorObservationPlacholderName: vector_observation
RecurrentInPlaceholderName: recurrent_in
RecurrentOutPlaceholderName: recurrent_out
VisualObservationPlaceholderName: []
ActionPlaceholderName: action
PreviousActionPlaceholderName: prev_action
brain: {fileID: 846768605}
- {fileID: 1962229171}
- {fileID: 703554261}
- {fileID: 35309571}
- {fileID: 339558607}
instanceID: 14244
--- !u!1 &984725368
GameObject:
m_ObjectHideFlags: 0

- component: {fileID: 984725370}
- component: {fileID: 984725369}
m_Layer: 0
m_Name: largeGoal
m_Name: LargeGoal
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
m_StaticBatchInfo:

m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1066285776
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
brain: {fileID: 846768605}
--- !u!1 &1178588871
GameObject:
m_ObjectHideFlags: 0

- component: {fileID: 1178588873}
- component: {fileID: 1178588872}
m_Layer: 0
m_Name: smallGoal
m_Name: SmallGoal
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 4294967295
m_Materials:
- {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2}
m_StaticBatchInfo:

m_Father: {fileID: 0}
m_RootOrder: 5
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1183791066
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
brain: {fileID: 0}
--- !u!114 &1458832067
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
continuousPlayerActions: []
discretePlayerActions:
- key: 97
value: 0
- key: 100
value: 1
defaultAction: -1
brain: {fileID: 846768605}
--- !u!1 &1574236047
GameObject:
m_ObjectHideFlags: 0

m_Father: {fileID: 0}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1962229171
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3}
m_Name: (Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)(Clone)
m_EditorClassIdentifier:
broadcast: 1
keyContinuousPlayerActions: []
axisContinuousPlayerActions: []
discretePlayerActions:
- key: 97
value: 0
- key: 100
value: 1
defaultAction: -1
brain: {fileID: 846768605}

28
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


private BasicAcademy academy;
public float timeBetweenDecisionsAtInference;
private float timeSinceDecision;
public int position;
public int smallGoalPosition;
public int largeGoalPosition;
int position;
int smallGoalPosition;
int largeGoalPosition;
public int minPosition;
public int maxPosition;
int minPosition;
int maxPosition;
public override void InitializeAgent()
{

public override void CollectObservations()
{
AddVectorObs(position);
AddVectorObs(position, 20);
}
public override void AgentAction(float[] vectorAction, string textAction)

if (position < minPosition) { position = minPosition; }
if (position > maxPosition) { position = maxPosition; }
gameObject.transform.position = new Vector3(position, 0f, 0f);
gameObject.transform.position = new Vector3(position - 10f, 0f, 0f);
AddReward(-0.01f);

public override void AgentReset()
{
position = 0;
minPosition = -10;
maxPosition = 10;
smallGoalPosition = -3;
largeGoalPosition = 7;
smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
position = 10;
minPosition = 0;
maxPosition = 20;
smallGoalPosition = 7;
largeGoalPosition = 17;
smallGoal.transform.position = new Vector3(smallGoalPosition - 10f, 0f, 0f);
largeGoal.transform.position = new Vector3(largeGoalPosition - 10f, 0f, 0f);
}
public override void AgentOnDone()

165
unity-environment/Assets/ML-Agents/Examples/Basic/TFModels/Basic.bytes


L
vector_observation Placeholder*
dtype0*
shape: ���������
D
Reshape/shapeConst*
valueB:
���������*
dtype0
L
ReshapeReshapevector_observation Reshape/shape*
T0*
Tshape0
?
OneHotEncoding/ToInt64CastReshape*
SrcT0*
DstT0
F
OneHotEncoding/one_hot/depthConst*
value B:*
dtype0
L
OneHotEncoding/one_hot/on_valueConst*
value B
*�?*
vector_observation Placeholder*
shape: ���������*
dtype0
main_graph_0/hidden_0/kernelConst*�
value� B� "� ���_>��7�ȣ*��Y{>7ˤ�u9>q��>�I��=�\>�u�<���/����_N>4�>3��D��=6����q���)�6*��˵?F=���>`wy>y�>쳻�����r�=���>`�\�q<��r�>6�<&�|h���9>WU<��K�h�_=N����2~>��=���|��>���4S�>���< <��U����A>��V����>5˟�-я�\DP=�p>M�ٽ9q�S���E��=`��I �>/��� ���>���g��� >�\��G���;6���~���"�}�{>���>鿥>���<�|!�)�&�.x�=�����ԫ>���=�.��N�=��7����=��)>��>��->�s�=�������cRz�%��QB�=�u>��-���o>>I��CE�v��=���> ����f�>�ƌ=��E>��`>2�V�. ���6�>j��>[I�>/'�=y����5=�\��A�=��o>e���I ��k4e����=��G���������>7�g�a��<+�>H���x�v>��;>N~��J� �������=���=���>P��=ͫw��r��l�㾡l徏%�w�>�l0=�!�>x��>.��U�V�{��=��6>w�$=�y��T�s>�z�R#���E¨=Nl>"���Ă�=g�&���+��?���^<4��"xA>����B�����=�a�M
�>c�y7�=�;ͽ�`.=�觾}�>���>?���iպ>]�]�Ph>��>뚪=9�~>���>���=��)>���>�-�=i��=;��> �j��U[�]Gj>:�>01�<z+�> ��=���Ȣ�<���*���b.��K�>G��=߆�>�cE=fX=^�k��Ӿ���<R>=][ �>�P>�vA>�늽�?�k�>ZV>@��>����S���?��X>��2?'>��ɾ�p>��K���,>�2?�&���>?9��91}>e& ��J�=�l�>6ۊ�o'�� �>�
<>J×>��>M�*�?�?�o+��B�}���q7>X��?�? ��>]( ���h>��?��>�/7�lJ-�Ϯ��?� ?ٓ޽��>�[�>)�D�B?�;����x2�������3D��J�>��$��<�����=���>�,�>���>������{ ?͜?�ӻ��>�L�/|-?+ f�pRJ��5о5CK>є���?f=rz���(�>���>J��<���>�cw���ɾ�~? ��>���<���>�H����>E#���#��&�v�>sk�:;?G�g> ��X?ċ�>��Ǿ�f�>^�o�IT�Ϫ)?D`�>���>֌>��,�p�?���Qz�����5>Z=,I�>��q>�U�=e���ؗZ>a��=Lo�<�u*>�rs��@0>{�L>8�����>�͞>Ũ�>��ܼK����>Ni��^`]�e��4>��>���������[��p^�=h��>8/����Z��ZB�=T[\>���=�m�=�t�@�<��� h�<��
�w�>O�'��/>%n��Us����=��ɽ���^z�=6r�6Z=1*�>4�J:+P>@��>��Z>os<߿~>�Ų���7�8��<�Ѡ�*
dtype0
�
!main_graph_0/hidden_0/kernel/readIdentitymain_graph_0/hidden_0/kernel*
T0*/
_class%
#!loc:@main_graph_0/hidden_0/kernel
�
main_graph_0/hidden_0/biasConst*e
value\BZ"PR��?��?��>P?<�ܾ7Ǿ�?��>Q[�>}D?j���I?6�־
1������t?4��� �?P8J>*
M
OneHotEncoding/one_hot/off_valueConst*
value B
**

main_graph_0/hidden_0/bias/readIdentitymain_graph_0/hidden_0/bias*
T0*-
_class#
!loc:@main_graph_0/hidden_0/bias
�
main_graph_0/hidden_0/MatMulMatMulvector_observation!main_graph_0/hidden_0/kernel/read*
T0*
transpose_a(*
transpose_b(
�
main_graph_0/hidden_0/BiasAddBiasAddmain_graph_0/hidden_0/MatMulmain_graph_0/hidden_0/bias/read*
T0*
data_formatNHWC
P
main_graph_0/hidden_0/SigmoidSigmoidmain_graph_0/hidden_0/BiasAdd*
T0
g
main_graph_0/hidden_0/MulMulmain_graph_0/hidden_0/BiasAddmain_graph_0/hidden_0/Sigmoid*
T0
�
dense/kernelConst*�
value�B�"����>0���}*��Z�>�������>��f��$Y>H����k�>�>����HM�>va������_ ?)Wʾ�w�>!�̾�_�>k�
�D?$c�>L*׾����v�>N��>�G��R�?>T���>�I�� �ؾ^'�>��>����X���c
?Q�z���T>*
�
OneHotEncoding/one_hotOneHotOneHotEncoding/ToInt64OneHotEncoding/one_hot/depthOneHotEncoding/one_hot/on_value OneHotEncoding/one_hot/off_value*
axis ���������*
T0*
TI0
dense/kernelConst*
dtype0*�
value� B� "� F� �/R�>Im?\��>�Zֽ��E�:㍾�6����ܾ�(�>O���>���(?��=v�>�,'>\�%?Q��ڧ��Ƶɾ[� ��0�>��F?:�s>��A?Y��>1Rv�A=)���Ͼ��b?Y|��96�c[�>�e>��>� �a0.?�z�p��X�8�“��]�v?��w>!;?e^?v�-?���ȹ�C���y�>�
`�w��XJ�>�5�=�Y4?r{$��>�і�9ା��������X?��H?��??��>K�O?�o��1�Rk���0�>T�߾�b �%��>n?i��>��(�2�?��߾x� �难𬾡�??��'?��-?�k>8�+?���}߾��ӾCJ?nþ签�R[6?�=?�?E @�.��>*�3�����00������2�>��>��>��?��,?.3-�);������~5?Rw��pR3����>�5?]�(?X�M�]��=�̅�r0B���7�����@?�U�>O?�M?�[?����:���V*����>�D���R-�t�;s�2>�T�>�z��Pt?�S8�������(����B,�>��}>Z)>P�Ž`w������s��D�� �>&&�����rz��d%1>�������fِ>X��=���;����s��4�=���>���Ϋ6���ֽ�J�hr=�d=>��n=�[*�lfս�f��ৗ����<Ȩ�> s7��bt�(O>�-�>�z�X�,>�!��E��H07>� ��(��`�i>�ͯ�T�>x(7=��?��?�>�.>�M����ؼ0��<�W]��:"��+�>PEy>ʀ�� �\�s�þ��þr��>@���`�������d����`�>�o�>0(T=����]H��mє���L>os�xMR���+=^�v����>�+��X������{Z�����=��8ꭾ��;�t:�=h*w>�{�=�9����> d���#�0��<uI>Ow>(�>�>˽0.">
�¾,?1��J�=���
�������j���:��>���>���� 4<B�j��K{�L�;>��@�ȒS>� ܽ�����>��6>���=����܏���P��H�
>��0�������=���=��1��=�>3��ws���t>���>��Z�d�Q>�)���p >F�¾TQ�li~�\6<>�*�=��G>ݤ=�L"'>P�>o럾%�>2����#2�� >p���t�>���>h�B=�>W�f�B�>��N��?�WW����=L���f�>�}�>pb�=�Z>�������>�۽
e�>��#�Hv�~g�>��>"Ӏ>�=k�4�d>$׈�>{�>?�=�9B>d<�=r��>�E=�T��>F+�>�脾�8C��擽0�'�8�� ���@�g���9�>J>٬0��ľv_�>6�Z�2��>_]b��.������KB��7���>���=�mܽ�mF>x#*�@;z�VxW�g�����'>������=a}>� ���s��v�>ܝ> )>P�>��>��l=��>��J�͌��4M~>��>X�ž�򏽰7�=po>D��=5�|�: ��T�>���<�{��T�u>����<]νRI��/��>�_�>tR>�����!��&��>X��>
p
dense/MatMulMatMulOneHotEncoding/one_hotdense/kernel/read*
s
dense/MatMulMatMulmain_graph_0/hidden_0/Muldense/kernel/read*
T0*
transpose_b(*
T0
/
dense/SigmoidSigmoid dense/MatMul*
T0
6
dense/MulMul dense/MatMul dense/Sigmoid*
T0
�
dense_1/kernelConst*
dtype0*�
value�B�"����>q���� ��.?�����?"�t� ?�(���?IM ��$?|:�>�RܾÈ�>5�ؾ>K?���g��}"?=.&?6E!�3�?}��%h���E�>C?���> p��?��? ��hP��H �>5??b���g?��Yx ?���
[
dense_1/kernel/readIdentitydense_1/kernel*
T0*!
_class
loc:@dense_1/kernel
g
dense_2/MatMulMatMul dense/Muldense_1/kernel/read*
transpose_a(*
transpose_b(*
T0
0
action_probsSoftmaxdense_2/MatMul*
transpose_b(
.
action_probsSoftmax dense/MatMul*

multinomial/Multinomial Multinomialdense_2/MatMul#multinomial/Multinomial/num_samples*
seed��2*
seed21*
�
multinomial/Multinomial Multinomial dense/MatMul#multinomial/Multinomial/num_samples*
seed20*
seed��#*
output_dtype0 *
dense_2/kernelConst*
dtype0*i
value`B^"P8i��-�*��,ս!'�>>�?��<;�Ӿ�zc>�<E���A�>�};�;��e}?�T�>�c=2I�>��̾U(><��
dense_1/kernelConst*i
value`B^"P�� ؁>���>E�ƾgA?��X��T? i#�x���I;AeN>����x�=@�j>��9����I�>+�=���=��־*
dtype0
dense_2/kernel/readIdentitydense_2/kernel*
dense_1/kernel/readIdentitydense_1/kernel*
loc:@dense_2/kernel
loc:@dense_1/kernel
dense_2/biasConst*
dtype0*
valueB*��>
dense_1/biasConst*
valueB*�]=*
dtype0
dense_2/bias/readIdentity dense_2/bias*
dense_1/bias/readIdentity dense_1/bias*
loc:@dense_2/bias
g
dense_3/MatMulMatMul dense/Muldense_2/kernel/read*
transpose_a(*
loc:@dense_1/bias
w
dense_1/MatMulMatMulmain_graph_0/hidden_0/Muldense_1/kernel/read*
T0
T0*
transpose_a(
dense_3/BiasAddBiasAdddense_3/MatMuldense_2/bias/read*
T0*
data_formatNHWC
dense_1/BiasAddBiasAdddense_1/MatMuldense_1/bias/read*
data_formatNHWC*
T0
value_estimateIdentitydense_3/BiasAdd*
value_estimateIdentitydense_1/BiasAdd*
T0

2
unity-environment/Assets/ML-Agents/Examples/Basic/TFModels/Basic.bytes.meta


fileFormatVersion: 2
guid: 8786b6500d406497c959f24c2a8b59ac
timeCreated: 1523662030
licenseType: Free
TextScriptImporter:
externalObjects: {}
userData:

78
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


action.textActions = "";
info.memories = new List<float>();
action.memories = new List<float>();
if (param.vectorObservationSpaceType == SpaceType.continuous)
{
info.vectorObservation =
new List<float>(param.vectorObservationSize);
info.stackedVectorObservation =
new List<float>(param.vectorObservationSize
* brain.brainParameters.numStackedVectorObservations);
info.stackedVectorObservation.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
}
else
{
info.vectorObservation = new List<float>(1);
info.stackedVectorObservation =
new List<float>(param.numStackedVectorObservations);
info.stackedVectorObservation.AddRange(
new float[param.numStackedVectorObservations]);
}
info.vectorObservation =
new List<float>(param.vectorObservationSize);
info.stackedVectorObservation =
new List<float>(param.vectorObservationSize
* brain.brainParameters.numStackedVectorObservations);
info.stackedVectorObservation.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
info.visualObservations = new List<Texture2D>();
}

CollectObservations();
BrainParameters param = brain.brainParameters;
if (param.vectorObservationSpaceType == SpaceType.continuous)
if (info.vectorObservation.Count != param.vectorObservationSize)
if (info.vectorObservation.Count != param.vectorObservationSize)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between continuous " +
"agent {0} and brain {1}. " +
"Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
brain.brainParameters.vectorObservationSize,
info.vectorObservation.Count));
}
info.stackedVectorObservation.RemoveRange(
0, param.vectorObservationSize);
info.stackedVectorObservation.AddRange(info.vectorObservation);
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between continuous " +
"agent {0} and brain {1}. " +
"Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
brain.brainParameters.vectorObservationSize,
info.vectorObservation.Count));
else
{
if (info.vectorObservation.Count != 1)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between discrete agent" +
" {0} and brain {1}. Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
1, info.vectorObservation.Count));
}
info.stackedVectorObservation.RemoveRange(0, 1);
info.stackedVectorObservation.AddRange(info.vectorObservation);
}
info.stackedVectorObservation.RemoveRange(
0, param.vectorObservationSize);
info.stackedVectorObservation.AddRange(info.vectorObservation);
info.visualObservations.Clear();
if (param.cameraResolutions.Length > agentParameters.agentCameras.Count)

/// - <see cref="AddVectorObs(float[])"/>
/// - <see cref="AddVectorObs(List{float})"/>
/// - <see cref="AddVectorObs(Quaternion)"/>
/// - <see cref="AddVectorObs(bool)"/>
/// - <see cref="AddVectorObs(int, int)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation

/// <param name="observation">Observation.</param>
protected void AddVectorObs(int observation)
{
info.vectorObservation.Add((float) observation);
info.vectorObservation.Add(observation);
}
/// <summary>

protected void AddVectorObs(bool observation)
{
info.vectorObservation.Add(observation ? 1f : 0f);
}
protected void AddVectorObs(int observation, int range)
{
float[] oneHotVector = new float[range];
oneHotVector[observation] = 1;
info.vectorObservation.AddRange(oneHotVector);
}
/// <summary>

/// The agent must set maxStepReached.</param>
/// <param name="academyDone">If set to <c>true</c>
/// The agent must set done.</param>
/// <param name="academyStepCounter">Number of current steps in episode</param>
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
{
if (academyDone)

2
unity-environment/Assets/ML-Agents/Scripts/Batcher.cs


VectorActionSize = brainParameters.vectorActionSize,
VectorActionSpaceType =
(CommunicatorObjects.SpaceTypeProto)brainParameters.vectorActionSpaceType,
VectorObservationSpaceType =
(CommunicatorObjects.SpaceTypeProto)brainParameters.vectorObservationSpaceType,
BrainName = name,
BrainType = type
};

22
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


public SpaceType vectorActionSpaceType = SpaceType.discrete;
/**< \brief Defines if the action is discrete or continuous */
public SpaceType vectorObservationSpaceType = SpaceType.continuous;
/**< \brief Defines if the state is discrete or continuous */
}
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +

*/
public class Brain : MonoBehaviour
{
private bool isInitialized = false;
private bool isInitialized;
private Dictionary<Agent, AgentInfo> agentInfos =
new Dictionary<Agent, AgentInfo>(1024);

public BrainType brainType;
//[HideInInspector]
///**< \brief Keeps track of the agents which subscribe to this brain*/
/// Keeps track of the agents which subscribe to this brain*/
// public Dictionary<int, Agent> agents = new Dictionary<int, Agent>();
[SerializeField] ScriptableObject[] CoreBrains;

{
CoreBrains[(int) bt] =
ScriptableObject.CreateInstance(
"CoreBrain" + bt.ToString());
"CoreBrain" + bt);
CoreBrains[(int) bt] =
ScriptableObject.Instantiate(CoreBrains[(int) bt]);
CoreBrains[(int) bt] = Instantiate(CoreBrains[(int) bt]);
}
}

if (!gameObject.activeSelf)
{
throw new UnityAgentsException(
string.Format("Agent {0} tried to request an action " +
"from brain {1} but it is not active.",
agent.gameObject.name, gameObject.name));
$"Agent {agent.gameObject.name} tried to request an action " +
$"from brain {gameObject.name} but it is not active.");
string.Format("Agent {0} tried to request an action " +
"from brain {1} but it was not initialized.",
agent.gameObject.name, gameObject.name));
$"Agent {agent.gameObject.name} tried to request an action " +
$"from brain {gameObject.name} but it was not initialized.");
}
else
{

2
unity-environment/Assets/ML-Agents/Scripts/CoreBrain.cs


void SetBrain(Brain b);
/// Implement this method to initialize CoreBrain
void InitializeCoreBrain(MLAgents.Batcher brainBatcher);
void InitializeCoreBrain(Batcher brainBatcher);
/// Implement this method to define the logic for deciding actions
void DecideAction(Dictionary<Agent, AgentInfo> agentInfo);

13
unity-environment/Assets/ML-Agents/Scripts/CoreBrainExternal.cs


/**< Reference to the brain that uses this CoreBrainExternal */
public Brain brain;
MLAgents.Batcher brainBatcher;
Batcher brainBatcher;
/// Creates the reference to the brain
public void SetBrain(Brain b)

/// Generates the communicator for the Academy if none was present and
/// subscribe to ExternalCommunicator if it was present.
public void InitializeCoreBrain(MLAgents.Batcher brainBatcher)
public void InitializeCoreBrain(Batcher brainBatcher)
throw new UnityAgentsException(string.Format("The brain {0} was set to" +
" External mode" +
" but Unity was unable to read the" +
" arguments passed at launch.",
brain.gameObject.name));
throw new UnityAgentsException($"The brain {brain.gameObject.name} was set to" + " External mode" +
" but Unity was unable to read the" + " arguments passed at launch.");
}
else
{

{
brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
}
return;
}
/// Nothing needs to appear in the inspector

9
unity-environment/Assets/ML-Agents/Scripts/CoreBrainHeuristic.cs


/**< Reference to the brain that uses this CoreBrainHeuristic */
public Brain brain;
MLAgents.Batcher brainBatcher;
Batcher brainBatcher;
/**< Reference to the Decision component used to decide the actions */
public Decision decision;

}
/// Create the reference to decision
public void InitializeCoreBrain(MLAgents.Batcher brainBatcher)
public void InitializeCoreBrain(Batcher brainBatcher)
{
decision = brain.gameObject.GetComponent<Decision>();

/// Uses the Decision Component to decide that action to take
public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
{
if (brainBatcher != null)
{
brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
}
brainBatcher?.SendBrainInfo(brain.gameObject.name, agentInfo);
if (decision == null)
{

649
unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
#endif
namespace MLAgents

{
[SerializeField]
[Tooltip("If checked, the brain will broadcast states and actions to Python.")]
[SerializeField] [Tooltip("If checked, the brain will broadcast states and actions to Python.")]
#pragma warning disable
private bool broadcast = true;
#pragma warning restore

{
public enum tensorType
public enum TensorType
{
Integer,
FloatingPoint

public tensorType valueType;
public TensorType valueType;
MLAgents.Batcher brainBatcher;
Batcher brainBatcher;
[Tooltip("This must be the bytes file corresponding to the pretrained TensorFlow graph.")]
/// Modify only in inspector : Reference to the Graph asset

/// Modify only in inspector : Name of the previous action node
public string PreviousActionPlaceholderName = "prev_action";
#if ENABLE_TENSORFLOW
TFGraph graph;
TFSession session;
bool hasRecurrent;
bool hasState;
bool hasBatchSize;
bool hasPrevAction;
float[,] inputState;
int[] inputPrevAction;
List<float[,,,]> observationMatrixList;
float[,] inputOldMemories;
List<Texture2D> texturesHolder;
int memorySize;
TFGraph graph;
TFSession session;
bool hasRecurrent;
bool hasState;
bool hasBatchSize;
bool hasPrevAction;
float[,] inputState;
int[] inputPrevAction;
List<float[,,,]> observationMatrixList;
float[,] inputOldMemories;
List<Texture2D> texturesHolder;
int memorySize;
#endif
/// Reference to the brain that uses this CoreBrainInternal

{
#if ENABLE_TENSORFLOW
#if UNITY_ANDROID
// This needs to ba called only once and will raise an exception if
// there are multiple internal brains
// This needs to ba called only once and will raise an exception if
// there are multiple internal brains
try{
TensorFlowSharp.Android.NativeBinding.Init();
}

#endif
if ((brainBatcher == null)
|| (!broadcast))
{
this.brainBatcher = null;
}
else
{
this.brainBatcher = brainBatcher;
this.brainBatcher.SubscribeBrain(brain.gameObject.name);
}
if ((brainBatcher == null)
|| (!broadcast))
{
this.brainBatcher = null;
}
else
{
this.brainBatcher = brainBatcher;
this.brainBatcher.SubscribeBrain(brain.gameObject.name);
}
if (graphModel != null)
{
graph = new TFGraph();
graph.Import(graphModel.bytes);
session = new TFSession(graph);
if (graphModel != null)
{
// TODO: Make this a loop over a dynamic set of graph inputs
graph = new TFGraph();
if ((graphScope.Length > 1) && (graphScope[graphScope.Length - 1] != '/'))
{
graphScope = graphScope + '/';
}
graph.Import(graphModel.bytes);
if (graph[graphScope + BatchSizePlaceholderName] != null)
{
hasBatchSize = true;
}
session = new TFSession(graph);
if ((graph[graphScope + RecurrentInPlaceholderName] != null) &&
(graph[graphScope + RecurrentOutPlaceholderName] != null))
{
hasRecurrent = true;
var runner = session.GetRunner();
runner.Fetch(graph[graphScope + "memory_size"][0]);
var networkOutput = runner.Run()[0].GetValue();
memorySize = (int) networkOutput;
}
// TODO: Make this a loop over a dynamic set of graph inputs
if (graph[graphScope + VectorObservationPlacholderName] != null)
{
hasState = true;
}
if ((graphScope.Length > 1) && (graphScope[graphScope.Length - 1] != '/'))
{
graphScope = graphScope + '/';
if (graph[graphScope + PreviousActionPlaceholderName] != null)
{
hasPrevAction = true;
}
if (graph[graphScope + BatchSizePlaceholderName] != null)
{
hasBatchSize = true;
}
if ((graph[graphScope + RecurrentInPlaceholderName] != null) && (graph[graphScope + RecurrentOutPlaceholderName] != null))
{
hasRecurrent = true;
var runner = session.GetRunner();
runner.Fetch(graph[graphScope + "memory_size"][0]);
var networkOutput = runner.Run()[0].GetValue();
memorySize = (int)networkOutput;
}
if (graph[graphScope + VectorObservationPlacholderName] != null)
{
hasState = true;
}
if (graph[graphScope + PreviousActionPlaceholderName] != null)
{
hasPrevAction = true;
}
}
observationMatrixList = new List<float[,,,]>();
texturesHolder = new List<Texture2D>();
observationMatrixList = new List<float[,,,]>();
texturesHolder = new List<Texture2D>();
/// Uses the stored information to run the tensorflow graph and generate

#if ENABLE_TENSORFLOW
if (brainBatcher != null)
{
brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
}
int currentBatchSize = agentInfo.Count();
List<Agent> agentList = agentInfo.Keys.ToList();
if (currentBatchSize == 0)
{
return;
}
if (brainBatcher != null)
{
brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
}
int currentBatchSize = agentInfo.Count();
List<Agent> agentList = agentInfo.Keys.ToList();
if (currentBatchSize == 0)
{
return;
}
// Create the state tensor
if (hasState)
{
int stateLength = 1;
if (brain.brainParameters.vectorObservationSpaceType == SpaceType.continuous)
// Create the state tensor
if (hasState)
int stateLength = 1;
}
inputState =
new float[currentBatchSize, stateLength * brain.brainParameters.numStackedVectorObservations];
inputState =
new float[currentBatchSize, stateLength * brain.brainParameters.numStackedVectorObservations];
var i = 0;
foreach (Agent agent in agentList)
{
List<float> state_list = agentInfo[agent].stackedVectorObservation;
for (int j =
0; j < stateLength * brain.brainParameters.numStackedVectorObservations; j++)
var i = 0;
foreach (Agent agent in agentList)
inputState[i, j] = state_list[j];
List<float> stateList = agentInfo[agent].stackedVectorObservation;
for (int j =
0;
j < stateLength * brain.brainParameters.numStackedVectorObservations;
j++)
{
inputState[i, j] = stateList[j];
}
i++;
i++;
}
// Create the state tensor
if (hasPrevAction)
{
inputPrevAction = new int[currentBatchSize];
var i = 0;
foreach (Agent agent in agentList)
// Create the state tensor
if (hasPrevAction)
float[] action_list = agentInfo[agent].storedVectorActions;
inputPrevAction[i] = Mathf.FloorToInt(action_list[0]);
i++;
inputPrevAction = new int[currentBatchSize];
var i = 0;
foreach (Agent agent in agentList)
{
float[] actionList = agentInfo[agent].storedVectorActions;
inputPrevAction[i] = Mathf.FloorToInt(actionList[0]);
i++;
}
}
observationMatrixList.Clear();
for (int observationIndex =
0; observationIndex < brain.brainParameters.cameraResolutions.Count(); observationIndex++){
texturesHolder.Clear();
foreach (Agent agent in agentList){
texturesHolder.Add(agentInfo[agent].visualObservations[observationIndex]);
}
observationMatrixList.Add(
BatchVisualObservations(texturesHolder, brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
}
// Create the recurrent tensor
if (hasRecurrent)
{
// Need to have variable memory size
inputOldMemories = new float[currentBatchSize, memorySize];
var i = 0;
foreach (Agent agent in agentList)
observationMatrixList.Clear();
for (int observationIndex =
0;
observationIndex < brain.brainParameters.cameraResolutions.Length;
observationIndex++)
float[] m = agentInfo[agent].memories.ToArray();
for (int j = 0; j < m.Count(); j++)
texturesHolder.Clear();
foreach (Agent agent in agentList)
inputOldMemories[i, j] = m[j];
texturesHolder.Add(agentInfo[agent].visualObservations[observationIndex]);
i++;
observationMatrixList.Add(
BatchVisualObservations(texturesHolder,
brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
}
// Create the recurrent tensor
if (hasRecurrent)
{
// Need to have variable memory size
inputOldMemories = new float[currentBatchSize, memorySize];
var i = 0;
foreach (Agent agent in agentList)
{
float[] m = agentInfo[agent].memories.ToArray();
for (int j = 0; j < m.Length; j++)
{
inputOldMemories[i, j] = m[j];
}
var runner = session.GetRunner();
try
{
runner.Fetch(graph[graphScope + ActionPlaceholderName][0]);
}
catch
{
throw new UnityAgentsException(string.Format(@"The node {0} could not be found. Please make sure the graphScope {1} is correct",
graphScope + ActionPlaceholderName, graphScope));
}
i++;
}
}
if (hasBatchSize)
{
runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] { currentBatchSize });
}
foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders)
{
var runner = session.GetRunner();
if (placeholder.valueType == TensorFlowAgentPlaceholder.tensorType.FloatingPoint)
{
runner.AddInput(graph[graphScope + placeholder.name][0], new float[] { Random.Range(placeholder.minValue, placeholder.maxValue) });
}
else if (placeholder.valueType == TensorFlowAgentPlaceholder.tensorType.Integer)
{
runner.AddInput(graph[graphScope + placeholder.name][0], new int[] { Random.Range((int)placeholder.minValue, (int)placeholder.maxValue + 1) });
}
runner.Fetch(graph[graphScope + ActionPlaceholderName][0]);
throw new UnityAgentsException(string.Format(@"One of the Tensorflow placeholder cound nout be found.
In brain {0}, there are no {1} placeholder named {2}.",
brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name));
throw new UnityAgentsException(string.Format(
@"The node {0} could not be found. Please make sure the graphScope {1} is correct",
graphScope + ActionPlaceholderName, graphScope));
}
// Create the state tensor
if (hasState)
{
if (brain.brainParameters.vectorObservationSpaceType == SpaceType.discrete)
if (hasBatchSize)
var discreteInputState = new int[currentBatchSize, 1];
for (int i = 0; i < currentBatchSize; i++)
runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] {currentBatchSize});
}
foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders)
{
try
discreteInputState[i, 0] = (int)inputState[i, 0];
if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.FloatingPoint)
{
runner.AddInput(graph[graphScope + placeholder.name][0],
new float[] {Random.Range(placeholder.minValue, placeholder.maxValue)});
}
else if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.Integer)
{
runner.AddInput(graph[graphScope + placeholder.name][0],
new int[] {Random.Range((int) placeholder.minValue, (int) placeholder.maxValue + 1)});
}
runner.AddInput(graph[graphScope + VectorObservationPlacholderName][0], discreteInputState);
catch
{
throw new UnityAgentsException(string.Format(
@"One of the Tensorflow placeholder cound nout be found.
In brain {0}, there are no {1} placeholder named {2}.",
brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name));
}
else
// Create the state tensor
if (hasState)
}
// Create the previous action tensor
if (hasPrevAction)
{
runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
}
// Create the previous action tensor
if (hasPrevAction)
{
runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
}
// Create the observation tensors
for (int obs_number =
0; obs_number < brain.brainParameters.cameraResolutions.Length; obs_number++)
{
runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obs_number]][0], observationMatrixList[obs_number]);
}
// Create the observation tensors
for (int obsNumber =
0;
obsNumber < brain.brainParameters.cameraResolutions.Length;
obsNumber++)
{
runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obsNumber]][0],
observationMatrixList[obsNumber]);
}
if (hasRecurrent)
{
runner.AddInput(graph[graphScope + "sequence_length"][0], 1);
runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
}
if (hasRecurrent)
{
runner.AddInput(graph[graphScope + "sequence_length"][0], 1);
runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
}
TFTensor[] networkOutput;
try
{
networkOutput = runner.Run();
}
catch (TFException e)
{
string errorMessage = e.Message;
TFTensor[] networkOutput;
errorMessage =
string.Format(@"The tensorflow graph needs an input for {0} of type {1}",
e.Message.Split(new string[] { "Node: " }, 0)[1].Split('=')[0],
e.Message.Split(new string[] { "dtype=" }, 0)[1].Split(',')[0]);
networkOutput = runner.Run();
finally
catch (TFException e)
throw new UnityAgentsException(errorMessage);
string errorMessage = e.Message;
try
{
errorMessage =
$@"The tensorflow graph needs an input for {e.Message.Split(new string[] {"Node: "}, 0)[1].Split('=')[0]} of type {e.Message.Split(new string[] {"dtype="}, 0)[1].Split(',')[0]}";
}
finally
{
throw new UnityAgentsException(errorMessage);
}
}
// Create the recurrent tensor
if (hasRecurrent)
{
float[,] recurrent_tensor = networkOutput[1].GetValue() as float[,];
var i = 0;
foreach (Agent agent in agentList)
// Create the recurrent tensor
if (hasRecurrent)
var m = new float[memorySize];
for (int j = 0; j < memorySize; j++)
float[,] recurrentTensor = networkOutput[1].GetValue() as float[,];
var i = 0;
foreach (Agent agent in agentList)
m[j] = recurrent_tensor[i, j];
var m = new float[memorySize];
for (int j = 0; j < memorySize; j++)
{
m[j] = recurrentTensor[i, j];
}
agent.UpdateMemoriesAction(m.ToList());
i++;
agent.UpdateMemoriesAction(m.ToList());
i++;
}
if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
{
var output = networkOutput[0].GetValue() as float[,];
var i = 0;
foreach (Agent agent in agentList)
if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
var a = new float[brain.brainParameters.vectorActionSize];
for (int j = 0; j < brain.brainParameters.vectorActionSize; j++)
var output = networkOutput[0].GetValue() as float[,];
var i = 0;
foreach (Agent agent in agentList)
a[j] = output[i, j];
var a = new float[brain.brainParameters.vectorActionSize];
for (int j = 0; j < brain.brainParameters.vectorActionSize; j++)
{
a[j] = output[i, j];
}
agent.UpdateVectorAction(a);
i++;
agent.UpdateVectorAction(a);
i++;
}
else if (brain.brainParameters.vectorActionSpaceType == SpaceType.discrete)
{
long[,] output = networkOutput[0].GetValue() as long[,];
var i = 0;
foreach (Agent agent in agentList)
else if (brain.brainParameters.vectorActionSpaceType == SpaceType.discrete)
var a = new float[1] { (float)(output[i, 0]) };
agent.UpdateVectorAction(a);
i++;
long[,] output = networkOutput[0].GetValue() as long[,];
var i = 0;
foreach (Agent agent in agentList)
{
var a = new float[1] {(float) (output[i, 0])};
agent.UpdateVectorAction(a);
i++;
}
}
#else

public void OnInspector()
{
#if ENABLE_TENSORFLOW && UNITY_EDITOR
EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
broadcast = EditorGUILayout.Toggle(new GUIContent("Broadcast",
"If checked, the brain will broadcast states and actions to Python."), broadcast);
EditorGUILayout.LabelField("", GUI.skin.horizontalSlider);
broadcast = EditorGUILayout.Toggle(new GUIContent("Broadcast",
"If checked, the brain will broadcast states and actions to Python."), broadcast);
var serializedBrain = new SerializedObject(this);
GUILayout.Label("Edit the Tensorflow graph parameters here");
var tfGraphModel = serializedBrain.FindProperty("graphModel");
serializedBrain.Update();
EditorGUILayout.ObjectField(tfGraphModel);
serializedBrain.ApplyModifiedProperties();
var serializedBrain = new SerializedObject(this);
GUILayout.Label("Edit the Tensorflow graph parameters here");
var tfGraphModel = serializedBrain.FindProperty("graphModel");
serializedBrain.Update();
EditorGUILayout.ObjectField(tfGraphModel);
serializedBrain.ApplyModifiedProperties();
if (graphModel == null)
{
EditorGUILayout.HelpBox("Please provide a tensorflow graph as a bytes file.", MessageType.Error);
}
if (graphModel == null)
{
EditorGUILayout.HelpBox("Please provide a tensorflow graph as a bytes file.", MessageType.Error);
}
graphScope =
EditorGUILayout.TextField(new GUIContent("Graph Scope", "If you set a scope while training your tensorflow model, " +
"all your placeholder name will have a prefix. You must specify that prefix here."), graphScope);
graphScope =
EditorGUILayout.TextField(new GUIContent("Graph Scope",
"If you set a scope while training your tensorflow model, " +
"all your placeholder name will have a prefix. You must specify that prefix here."), graphScope);
if (BatchSizePlaceholderName == "")
{
BatchSizePlaceholderName = "batch_size";
}
if (BatchSizePlaceholderName == "")
{
BatchSizePlaceholderName = "batch_size";
}
BatchSizePlaceholderName =
EditorGUILayout.TextField(new GUIContent("Batch Size Node Name", "If the batch size is one of " +
"the inputs of your graph, you must specify the name if the placeholder here."), BatchSizePlaceholderName);
if (VectorObservationPlacholderName == "")
{
VectorObservationPlacholderName = "state";
}
VectorObservationPlacholderName =
EditorGUILayout.TextField(new GUIContent("Vector Observation Node Name", "If your graph uses the state as an input, " +
"you must specify the name if the placeholder here."), VectorObservationPlacholderName);
if (RecurrentInPlaceholderName == "")
{
RecurrentInPlaceholderName = "recurrent_in";
}
RecurrentInPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Recurrent Input Node Name", "If your graph uses a " +
"recurrent input / memory as input and outputs new recurrent input / memory, " +
"you must specify the name if the input placeholder here."), RecurrentInPlaceholderName);
if (RecurrentOutPlaceholderName == "")
{
RecurrentOutPlaceholderName = "recurrent_out";
}
RecurrentOutPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Recurrent Output Node Name", " If your graph uses a " +
"recurrent input / memory as input and outputs new recurrent input / memory, you must specify the name if " +
"the output placeholder here."), RecurrentOutPlaceholderName);
BatchSizePlaceholderName =
EditorGUILayout.TextField(new GUIContent("Batch Size Node Name", "If the batch size is one of " +
"the inputs of your graph, you must specify the name if the placeholder here."),
BatchSizePlaceholderName);
if (VectorObservationPlacholderName == "")
{
VectorObservationPlacholderName = "state";
}
if (brain.brainParameters.cameraResolutions != null)
{
if (brain.brainParameters.cameraResolutions.Count() > 0)
VectorObservationPlacholderName =
EditorGUILayout.TextField(new GUIContent("Vector Observation Node Name",
"If your graph uses the state as an input, " +
"you must specify the name if the placeholder here."), VectorObservationPlacholderName);
if (RecurrentInPlaceholderName == "")
if (VisualObservationPlaceholderName == null)
{
VisualObservationPlaceholderName =
new string[brain.brainParameters.cameraResolutions.Count()];
}
if (VisualObservationPlaceholderName.Count() != brain.brainParameters.cameraResolutions.Count())
{
VisualObservationPlaceholderName =
new string[brain.brainParameters.cameraResolutions.Count()];
}
for (int obs_number =
0; obs_number < brain.brainParameters.cameraResolutions.Count(); obs_number++)
RecurrentInPlaceholderName = "recurrent_in";
}
RecurrentInPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Recurrent Input Node Name", "If your graph uses a " +
"recurrent input / memory as input and outputs new recurrent input / memory, " +
"you must specify the name if the input placeholder here."),
RecurrentInPlaceholderName);
if (RecurrentOutPlaceholderName == "")
{
RecurrentOutPlaceholderName = "recurrent_out";
}
RecurrentOutPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Recurrent Output Node Name", " If your graph uses a " +
"recurrent input / memory as input and outputs new recurrent input / memory, you must specify the name if " +
"the output placeholder here."),
RecurrentOutPlaceholderName);
if (brain.brainParameters.cameraResolutions != null)
{
if (brain.brainParameters.cameraResolutions.Count() > 0)
if ((VisualObservationPlaceholderName[obs_number] == "") || (VisualObservationPlaceholderName[obs_number] == null))
if (VisualObservationPlaceholderName == null)
VisualObservationPlaceholderName =
new string[brain.brainParameters.cameraResolutions.Count()];
}
VisualObservationPlaceholderName[obs_number] =
"visual_observation_" + obs_number;
if (VisualObservationPlaceholderName.Count() != brain.brainParameters.cameraResolutions.Count())
{
VisualObservationPlaceholderName =
new string[brain.brainParameters.cameraResolutions.Count()];
for (int obs_number =
0;
obs_number < brain.brainParameters.cameraResolutions.Count();
obs_number++)
{
if ((VisualObservationPlaceholderName[obs_number] == "") ||
(VisualObservationPlaceholderName[obs_number] == null))
{
VisualObservationPlaceholderName[obs_number] =
"visual_observation_" + obs_number;
}
}
var opn = serializedBrain.FindProperty("VisualObservationPlaceholderName");
serializedBrain.Update();
EditorGUILayout.PropertyField(opn, true);
serializedBrain.ApplyModifiedProperties();
var opn = serializedBrain.FindProperty("VisualObservationPlaceholderName");
serializedBrain.Update();
EditorGUILayout.PropertyField(opn, true);
serializedBrain.ApplyModifiedProperties();
}
if (ActionPlaceholderName == "")
{
ActionPlaceholderName = "action";
}
ActionPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Action Node Name", "Specify the name of the " +
"placeholder corresponding to the actions of the brain in your graph. If the action space type is " +
"continuous, the output must be a one dimensional tensor of float of length Action Space Size, " +
"if the action space type is discrete, the output must be a one dimensional tensor of int " +
"of length 1."), ActionPlaceholderName);
if (ActionPlaceholderName == "")
{
ActionPlaceholderName = "action";
}
ActionPlaceholderName =
EditorGUILayout.TextField(new GUIContent("Action Node Name", "Specify the name of the " +
"placeholder corresponding to the actions of the brain in your graph. If the action space type is " +
"continuous, the output must be a one dimensional tensor of float of length Action Space Size, " +
"if the action space type is discrete, the output must be a one dimensional tensor of int " +
"of length 1."), ActionPlaceholderName);
var tfPlaceholders = serializedBrain.FindProperty("graphPlaceholders");
serializedBrain.Update();
EditorGUILayout.PropertyField(tfPlaceholders, true);
serializedBrain.ApplyModifiedProperties();
var tfPlaceholders = serializedBrain.FindProperty("graphPlaceholders");
serializedBrain.Update();
EditorGUILayout.PropertyField(tfPlaceholders, true);
serializedBrain.ApplyModifiedProperties();
#endif
#if !ENABLE_TENSORFLOW && UNITY_EDITOR
EditorGUILayout.HelpBox(

return result;
}
}
}

2
unity-environment/Assets/ML-Agents/Scripts/RpcCommunicator.cs


/// <param name="communicatorParameters">Communicator parameters.</param>
public RPCCommunicator(CommunicatorParameters communicatorParameters)
{
this.m_communicatorParameters = communicatorParameters;
m_communicatorParameters = communicatorParameters;
}
/// <summary>

2
unity-environment/Assets/ML-Agents/Scripts/UnityAgentsException.cs


namespace MLAgents
{
[System.Serializable]
[Serializable]
/// Contains exceptions specific to ML-Agents.
public class UnityAgentsException : System.Exception
{

正在加载...
取消
保存